1""" 2Read and write ZIP files. 3 4XXX references to utf-8 need further investigation. 5""" 6import binascii 7import importlib.util 8import io 9import itertools 10import os 11import posixpath 12import re 13import shutil 14import stat 15import struct 16import sys 17import threading 18import time 19import contextlib 20import pathlib 21 22try: 23 import zlib # We may need its compression method 24 crc32 = zlib.crc32 25except ImportError: 26 zlib = None 27 crc32 = binascii.crc32 28 29try: 30 import bz2 # We may need its compression method 31except ImportError: 32 bz2 = None 33 34try: 35 import lzma # We may need its compression method 36except ImportError: 37 lzma = None 38 39__all__ = ["BadZipFile", "BadZipfile", "error", 40 "ZIP_STORED", "ZIP_DEFLATED", "ZIP_BZIP2", "ZIP_LZMA", 41 "is_zipfile", "ZipInfo", "ZipFile", "PyZipFile", "LargeZipFile", 42 "Path"] 43 44class BadZipFile(Exception): 45 pass 46 47 48class LargeZipFile(Exception): 49 """ 50 Raised when writing a zipfile, the zipfile requires ZIP64 extensions 51 and those extensions are disabled. 52 """ 53 54error = BadZipfile = BadZipFile # Pre-3.2 compatibility names 55 56 57ZIP64_LIMIT = (1 << 31) - 1 58ZIP_FILECOUNT_LIMIT = (1 << 16) - 1 59ZIP_MAX_COMMENT = (1 << 16) - 1 60 61# constants for Zip file compression methods 62ZIP_STORED = 0 63ZIP_DEFLATED = 8 64ZIP_BZIP2 = 12 65ZIP_LZMA = 14 66# Other ZIP compression methods not supported 67 68DEFAULT_VERSION = 20 69ZIP64_VERSION = 45 70BZIP2_VERSION = 46 71LZMA_VERSION = 63 72# we recognize (but not necessarily support) all features up to that version 73MAX_EXTRACT_VERSION = 63 74 75# Below are some formats and associated data for reading/writing headers using 76# the struct module. The names and structures of headers/records are those used 77# in the PKWARE description of the ZIP file format: 78# http://www.pkware.com/documents/casestudies/APPNOTE.TXT 79# (URL valid as of January 2008) 80 81# The "end of central directory" structure, magic number, size, and indices 82# (section V.I in the format document) 83structEndArchive = b"<4s4H2LH" 84stringEndArchive = b"PK\005\006" 85sizeEndCentDir = struct.calcsize(structEndArchive) 86 87_ECD_SIGNATURE = 0 88_ECD_DISK_NUMBER = 1 89_ECD_DISK_START = 2 90_ECD_ENTRIES_THIS_DISK = 3 91_ECD_ENTRIES_TOTAL = 4 92_ECD_SIZE = 5 93_ECD_OFFSET = 6 94_ECD_COMMENT_SIZE = 7 95# These last two indices are not part of the structure as defined in the 96# spec, but they are used internally by this module as a convenience 97_ECD_COMMENT = 8 98_ECD_LOCATION = 9 99 100# The "central directory" structure, magic number, size, and indices 101# of entries in the structure (section V.F in the format document) 102structCentralDir = "<4s4B4HL2L5H2L" 103stringCentralDir = b"PK\001\002" 104sizeCentralDir = struct.calcsize(structCentralDir) 105 106# indexes of entries in the central directory structure 107_CD_SIGNATURE = 0 108_CD_CREATE_VERSION = 1 109_CD_CREATE_SYSTEM = 2 110_CD_EXTRACT_VERSION = 3 111_CD_EXTRACT_SYSTEM = 4 112_CD_FLAG_BITS = 5 113_CD_COMPRESS_TYPE = 6 114_CD_TIME = 7 115_CD_DATE = 8 116_CD_CRC = 9 117_CD_COMPRESSED_SIZE = 10 118_CD_UNCOMPRESSED_SIZE = 11 119_CD_FILENAME_LENGTH = 12 120_CD_EXTRA_FIELD_LENGTH = 13 121_CD_COMMENT_LENGTH = 14 122_CD_DISK_NUMBER_START = 15 123_CD_INTERNAL_FILE_ATTRIBUTES = 16 124_CD_EXTERNAL_FILE_ATTRIBUTES = 17 125_CD_LOCAL_HEADER_OFFSET = 18 126 127# General purpose bit flags 128# Zip Appnote: 4.4.4 general purpose bit flag: (2 bytes) 129_MASK_ENCRYPTED = 1 << 0 130# Bits 1 and 2 have different meanings depending on the compression used. 131_MASK_COMPRESS_OPTION_1 = 1 << 1 132# _MASK_COMPRESS_OPTION_2 = 1 << 2 133# _MASK_USE_DATA_DESCRIPTOR: If set, crc-32, compressed size and uncompressed 134# size are zero in the local header and the real values are written in the data 135# descriptor immediately following the compressed data. 136_MASK_USE_DATA_DESCRIPTOR = 1 << 3 137# Bit 4: Reserved for use with compression method 8, for enhanced deflating. 138# _MASK_RESERVED_BIT_4 = 1 << 4 139_MASK_COMPRESSED_PATCH = 1 << 5 140_MASK_STRONG_ENCRYPTION = 1 << 6 141# _MASK_UNUSED_BIT_7 = 1 << 7 142# _MASK_UNUSED_BIT_8 = 1 << 8 143# _MASK_UNUSED_BIT_9 = 1 << 9 144# _MASK_UNUSED_BIT_10 = 1 << 10 145_MASK_UTF_FILENAME = 1 << 11 146# Bit 12: Reserved by PKWARE for enhanced compression. 147# _MASK_RESERVED_BIT_12 = 1 << 12 148# _MASK_ENCRYPTED_CENTRAL_DIR = 1 << 13 149# Bit 14, 15: Reserved by PKWARE 150# _MASK_RESERVED_BIT_14 = 1 << 14 151# _MASK_RESERVED_BIT_15 = 1 << 15 152 153# The "local file header" structure, magic number, size, and indices 154# (section V.A in the format document) 155structFileHeader = "<4s2B4HL2L2H" 156stringFileHeader = b"PK\003\004" 157sizeFileHeader = struct.calcsize(structFileHeader) 158 159_FH_SIGNATURE = 0 160_FH_EXTRACT_VERSION = 1 161_FH_EXTRACT_SYSTEM = 2 162_FH_GENERAL_PURPOSE_FLAG_BITS = 3 163_FH_COMPRESSION_METHOD = 4 164_FH_LAST_MOD_TIME = 5 165_FH_LAST_MOD_DATE = 6 166_FH_CRC = 7 167_FH_COMPRESSED_SIZE = 8 168_FH_UNCOMPRESSED_SIZE = 9 169_FH_FILENAME_LENGTH = 10 170_FH_EXTRA_FIELD_LENGTH = 11 171 172# The "Zip64 end of central directory locator" structure, magic number, and size 173structEndArchive64Locator = "<4sLQL" 174stringEndArchive64Locator = b"PK\x06\x07" 175sizeEndCentDir64Locator = struct.calcsize(structEndArchive64Locator) 176 177# The "Zip64 end of central directory" record, magic number, size, and indices 178# (section V.G in the format document) 179structEndArchive64 = "<4sQ2H2L4Q" 180stringEndArchive64 = b"PK\x06\x06" 181sizeEndCentDir64 = struct.calcsize(structEndArchive64) 182 183_CD64_SIGNATURE = 0 184_CD64_DIRECTORY_RECSIZE = 1 185_CD64_CREATE_VERSION = 2 186_CD64_EXTRACT_VERSION = 3 187_CD64_DISK_NUMBER = 4 188_CD64_DISK_NUMBER_START = 5 189_CD64_NUMBER_ENTRIES_THIS_DISK = 6 190_CD64_NUMBER_ENTRIES_TOTAL = 7 191_CD64_DIRECTORY_SIZE = 8 192_CD64_OFFSET_START_CENTDIR = 9 193 194_DD_SIGNATURE = 0x08074b50 195 196_EXTRA_FIELD_STRUCT = struct.Struct('<HH') 197 198def _strip_extra(extra, xids): 199 # Remove Extra Fields with specified IDs. 200 unpack = _EXTRA_FIELD_STRUCT.unpack 201 modified = False 202 buffer = [] 203 start = i = 0 204 while i + 4 <= len(extra): 205 xid, xlen = unpack(extra[i : i + 4]) 206 j = i + 4 + xlen 207 if xid in xids: 208 if i != start: 209 buffer.append(extra[start : i]) 210 start = j 211 modified = True 212 i = j 213 if not modified: 214 return extra 215 if start != len(extra): 216 buffer.append(extra[start:]) 217 return b''.join(buffer) 218 219def _check_zipfile(fp): 220 try: 221 if _EndRecData(fp): 222 return True # file has correct magic number 223 except OSError: 224 pass 225 return False 226 227def is_zipfile(filename): 228 """Quickly see if a file is a ZIP file by checking the magic number. 229 230 The filename argument may be a file or file-like object too. 231 """ 232 result = False 233 try: 234 if hasattr(filename, "read"): 235 result = _check_zipfile(fp=filename) 236 else: 237 with open(filename, "rb") as fp: 238 result = _check_zipfile(fp) 239 except OSError: 240 pass 241 return result 242 243def _EndRecData64(fpin, offset, endrec): 244 """ 245 Read the ZIP64 end-of-archive records and use that to update endrec 246 """ 247 try: 248 fpin.seek(offset - sizeEndCentDir64Locator, 2) 249 except OSError: 250 # If the seek fails, the file is not large enough to contain a ZIP64 251 # end-of-archive record, so just return the end record we were given. 252 return endrec 253 254 data = fpin.read(sizeEndCentDir64Locator) 255 if len(data) != sizeEndCentDir64Locator: 256 return endrec 257 sig, diskno, reloff, disks = struct.unpack(structEndArchive64Locator, data) 258 if sig != stringEndArchive64Locator: 259 return endrec 260 261 if diskno != 0 or disks > 1: 262 raise BadZipFile("zipfiles that span multiple disks are not supported") 263 264 # Assume no 'zip64 extensible data' 265 fpin.seek(offset - sizeEndCentDir64Locator - sizeEndCentDir64, 2) 266 data = fpin.read(sizeEndCentDir64) 267 if len(data) != sizeEndCentDir64: 268 return endrec 269 sig, sz, create_version, read_version, disk_num, disk_dir, \ 270 dircount, dircount2, dirsize, diroffset = \ 271 struct.unpack(structEndArchive64, data) 272 if sig != stringEndArchive64: 273 return endrec 274 275 # Update the original endrec using data from the ZIP64 record 276 endrec[_ECD_SIGNATURE] = sig 277 endrec[_ECD_DISK_NUMBER] = disk_num 278 endrec[_ECD_DISK_START] = disk_dir 279 endrec[_ECD_ENTRIES_THIS_DISK] = dircount 280 endrec[_ECD_ENTRIES_TOTAL] = dircount2 281 endrec[_ECD_SIZE] = dirsize 282 endrec[_ECD_OFFSET] = diroffset 283 return endrec 284 285 286def _EndRecData(fpin): 287 """Return data from the "End of Central Directory" record, or None. 288 289 The data is a list of the nine items in the ZIP "End of central dir" 290 record followed by a tenth item, the file seek offset of this record.""" 291 292 # Determine file size 293 fpin.seek(0, 2) 294 filesize = fpin.tell() 295 296 # Check to see if this is ZIP file with no archive comment (the 297 # "end of central directory" structure should be the last item in the 298 # file if this is the case). 299 try: 300 fpin.seek(-sizeEndCentDir, 2) 301 except OSError: 302 return None 303 data = fpin.read() 304 if (len(data) == sizeEndCentDir and 305 data[0:4] == stringEndArchive and 306 data[-2:] == b"\000\000"): 307 # the signature is correct and there's no comment, unpack structure 308 endrec = struct.unpack(structEndArchive, data) 309 endrec=list(endrec) 310 311 # Append a blank comment and record start offset 312 endrec.append(b"") 313 endrec.append(filesize - sizeEndCentDir) 314 315 # Try to read the "Zip64 end of central directory" structure 316 return _EndRecData64(fpin, -sizeEndCentDir, endrec) 317 318 # Either this is not a ZIP file, or it is a ZIP file with an archive 319 # comment. Search the end of the file for the "end of central directory" 320 # record signature. The comment is the last item in the ZIP file and may be 321 # up to 64K long. It is assumed that the "end of central directory" magic 322 # number does not appear in the comment. 323 maxCommentStart = max(filesize - (1 << 16) - sizeEndCentDir, 0) 324 fpin.seek(maxCommentStart, 0) 325 data = fpin.read() 326 start = data.rfind(stringEndArchive) 327 if start >= 0: 328 # found the magic number; attempt to unpack and interpret 329 recData = data[start:start+sizeEndCentDir] 330 if len(recData) != sizeEndCentDir: 331 # Zip file is corrupted. 332 return None 333 endrec = list(struct.unpack(structEndArchive, recData)) 334 commentSize = endrec[_ECD_COMMENT_SIZE] #as claimed by the zip file 335 comment = data[start+sizeEndCentDir:start+sizeEndCentDir+commentSize] 336 endrec.append(comment) 337 endrec.append(maxCommentStart + start) 338 339 # Try to read the "Zip64 end of central directory" structure 340 return _EndRecData64(fpin, maxCommentStart + start - filesize, 341 endrec) 342 343 # Unable to find a valid end of central directory structure 344 return None 345 346 347class ZipInfo (object): 348 """Class with attributes describing each file in the ZIP archive.""" 349 350 __slots__ = ( 351 'orig_filename', 352 'filename', 353 'date_time', 354 'compress_type', 355 '_compresslevel', 356 'comment', 357 'extra', 358 'create_system', 359 'create_version', 360 'extract_version', 361 'reserved', 362 'flag_bits', 363 'volume', 364 'internal_attr', 365 'external_attr', 366 'header_offset', 367 'CRC', 368 'compress_size', 369 'file_size', 370 '_raw_time', 371 '_end_offset', 372 ) 373 374 def __init__(self, filename="NoName", date_time=(1980,1,1,0,0,0)): 375 self.orig_filename = filename # Original file name in archive 376 377 # Terminate the file name at the first null byte. Null bytes in file 378 # names are used as tricks by viruses in archives. 379 null_byte = filename.find(chr(0)) 380 if null_byte >= 0: 381 filename = filename[0:null_byte] 382 # This is used to ensure paths in generated ZIP files always use 383 # forward slashes as the directory separator, as required by the 384 # ZIP format specification. 385 if os.sep != "/" and os.sep in filename: 386 filename = filename.replace(os.sep, "/") 387 388 self.filename = filename # Normalized file name 389 self.date_time = date_time # year, month, day, hour, min, sec 390 391 if date_time[0] < 1980: 392 raise ValueError('ZIP does not support timestamps before 1980') 393 394 # Standard values: 395 self.compress_type = ZIP_STORED # Type of compression for the file 396 self._compresslevel = None # Level for the compressor 397 self.comment = b"" # Comment for each file 398 self.extra = b"" # ZIP extra data 399 if sys.platform == 'win32': 400 self.create_system = 0 # System which created ZIP archive 401 else: 402 # Assume everything else is unix-y 403 self.create_system = 3 # System which created ZIP archive 404 self.create_version = DEFAULT_VERSION # Version which created ZIP archive 405 self.extract_version = DEFAULT_VERSION # Version needed to extract archive 406 self.reserved = 0 # Must be zero 407 self.flag_bits = 0 # ZIP flag bits 408 self.volume = 0 # Volume number of file header 409 self.internal_attr = 0 # Internal attributes 410 self.external_attr = 0 # External file attributes 411 self.compress_size = 0 # Size of the compressed file 412 self.file_size = 0 # Size of the uncompressed file 413 self._end_offset = None # Start of the next local header or central directory 414 # Other attributes are set by class ZipFile: 415 # header_offset Byte offset to the file header 416 # CRC CRC-32 of the uncompressed file 417 418 def __repr__(self): 419 result = ['<%s filename=%r' % (self.__class__.__name__, self.filename)] 420 if self.compress_type != ZIP_STORED: 421 result.append(' compress_type=%s' % 422 compressor_names.get(self.compress_type, 423 self.compress_type)) 424 hi = self.external_attr >> 16 425 lo = self.external_attr & 0xFFFF 426 if hi: 427 result.append(' filemode=%r' % stat.filemode(hi)) 428 if lo: 429 result.append(' external_attr=%#x' % lo) 430 isdir = self.is_dir() 431 if not isdir or self.file_size: 432 result.append(' file_size=%r' % self.file_size) 433 if ((not isdir or self.compress_size) and 434 (self.compress_type != ZIP_STORED or 435 self.file_size != self.compress_size)): 436 result.append(' compress_size=%r' % self.compress_size) 437 result.append('>') 438 return ''.join(result) 439 440 def FileHeader(self, zip64=None): 441 """Return the per-file header as a bytes object. 442 443 When the optional zip64 arg is None rather than a bool, we will 444 decide based upon the file_size and compress_size, if known, 445 False otherwise. 446 """ 447 dt = self.date_time 448 dosdate = (dt[0] - 1980) << 9 | dt[1] << 5 | dt[2] 449 dostime = dt[3] << 11 | dt[4] << 5 | (dt[5] // 2) 450 if self.flag_bits & _MASK_USE_DATA_DESCRIPTOR: 451 # Set these to zero because we write them after the file data 452 CRC = compress_size = file_size = 0 453 else: 454 CRC = self.CRC 455 compress_size = self.compress_size 456 file_size = self.file_size 457 458 extra = self.extra 459 460 min_version = 0 461 if zip64 is None: 462 # We always explicitly pass zip64 within this module.... This 463 # remains for anyone using ZipInfo.FileHeader as a public API. 464 zip64 = file_size > ZIP64_LIMIT or compress_size > ZIP64_LIMIT 465 if zip64: 466 fmt = '<HHQQ' 467 extra = extra + struct.pack(fmt, 468 1, struct.calcsize(fmt)-4, file_size, compress_size) 469 file_size = 0xffffffff 470 compress_size = 0xffffffff 471 min_version = ZIP64_VERSION 472 473 if self.compress_type == ZIP_BZIP2: 474 min_version = max(BZIP2_VERSION, min_version) 475 elif self.compress_type == ZIP_LZMA: 476 min_version = max(LZMA_VERSION, min_version) 477 478 self.extract_version = max(min_version, self.extract_version) 479 self.create_version = max(min_version, self.create_version) 480 filename, flag_bits = self._encodeFilenameFlags() 481 header = struct.pack(structFileHeader, stringFileHeader, 482 self.extract_version, self.reserved, flag_bits, 483 self.compress_type, dostime, dosdate, CRC, 484 compress_size, file_size, 485 len(filename), len(extra)) 486 return header + filename + extra 487 488 def _encodeFilenameFlags(self): 489 try: 490 return self.filename.encode('ascii'), self.flag_bits 491 except UnicodeEncodeError: 492 return self.filename.encode('utf-8'), self.flag_bits | _MASK_UTF_FILENAME 493 494 def _decodeExtra(self): 495 # Try to decode the extra field. 496 extra = self.extra 497 unpack = struct.unpack 498 while len(extra) >= 4: 499 tp, ln = unpack('<HH', extra[:4]) 500 if ln+4 > len(extra): 501 raise BadZipFile("Corrupt extra field %04x (size=%d)" % (tp, ln)) 502 if tp == 0x0001: 503 data = extra[4:ln+4] 504 # ZIP64 extension (large files and/or large archives) 505 try: 506 if self.file_size in (0xFFFF_FFFF_FFFF_FFFF, 0xFFFF_FFFF): 507 field = "File size" 508 self.file_size, = unpack('<Q', data[:8]) 509 data = data[8:] 510 if self.compress_size == 0xFFFF_FFFF: 511 field = "Compress size" 512 self.compress_size, = unpack('<Q', data[:8]) 513 data = data[8:] 514 if self.header_offset == 0xFFFF_FFFF: 515 field = "Header offset" 516 self.header_offset, = unpack('<Q', data[:8]) 517 except struct.error: 518 raise BadZipFile(f"Corrupt zip64 extra field. " 519 f"{field} not found.") from None 520 521 extra = extra[ln+4:] 522 523 @classmethod 524 def from_file(cls, filename, arcname=None, *, strict_timestamps=True): 525 """Construct an appropriate ZipInfo for a file on the filesystem. 526 527 filename should be the path to a file or directory on the filesystem. 528 529 arcname is the name which it will have within the archive (by default, 530 this will be the same as filename, but without a drive letter and with 531 leading path separators removed). 532 """ 533 if isinstance(filename, os.PathLike): 534 filename = os.fspath(filename) 535 st = os.stat(filename) 536 isdir = stat.S_ISDIR(st.st_mode) 537 mtime = time.localtime(st.st_mtime) 538 date_time = mtime[0:6] 539 if not strict_timestamps and date_time[0] < 1980: 540 date_time = (1980, 1, 1, 0, 0, 0) 541 elif not strict_timestamps and date_time[0] > 2107: 542 date_time = (2107, 12, 31, 23, 59, 59) 543 # Create ZipInfo instance to store file information 544 if arcname is None: 545 arcname = filename 546 arcname = os.path.normpath(os.path.splitdrive(arcname)[1]) 547 while arcname[0] in (os.sep, os.altsep): 548 arcname = arcname[1:] 549 if isdir: 550 arcname += '/' 551 zinfo = cls(arcname, date_time) 552 zinfo.external_attr = (st.st_mode & 0xFFFF) << 16 # Unix attributes 553 if isdir: 554 zinfo.file_size = 0 555 zinfo.external_attr |= 0x10 # MS-DOS directory flag 556 else: 557 zinfo.file_size = st.st_size 558 559 return zinfo 560 561 def is_dir(self): 562 """Return True if this archive member is a directory.""" 563 return self.filename[-1] == '/' 564 565 566# ZIP encryption uses the CRC32 one-byte primitive for scrambling some 567# internal keys. We noticed that a direct implementation is faster than 568# relying on binascii.crc32(). 569 570_crctable = None 571def _gen_crc(crc): 572 for j in range(8): 573 if crc & 1: 574 crc = (crc >> 1) ^ 0xEDB88320 575 else: 576 crc >>= 1 577 return crc 578 579# ZIP supports a password-based form of encryption. Even though known 580# plaintext attacks have been found against it, it is still useful 581# to be able to get data out of such a file. 582# 583# Usage: 584# zd = _ZipDecrypter(mypwd) 585# plain_bytes = zd(cypher_bytes) 586 587def _ZipDecrypter(pwd): 588 key0 = 305419896 589 key1 = 591751049 590 key2 = 878082192 591 592 global _crctable 593 if _crctable is None: 594 _crctable = list(map(_gen_crc, range(256))) 595 crctable = _crctable 596 597 def crc32(ch, crc): 598 """Compute the CRC32 primitive on one byte.""" 599 return (crc >> 8) ^ crctable[(crc ^ ch) & 0xFF] 600 601 def update_keys(c): 602 nonlocal key0, key1, key2 603 key0 = crc32(c, key0) 604 key1 = (key1 + (key0 & 0xFF)) & 0xFFFFFFFF 605 key1 = (key1 * 134775813 + 1) & 0xFFFFFFFF 606 key2 = crc32(key1 >> 24, key2) 607 608 for p in pwd: 609 update_keys(p) 610 611 def decrypter(data): 612 """Decrypt a bytes object.""" 613 result = bytearray() 614 append = result.append 615 for c in data: 616 k = key2 | 2 617 c ^= ((k * (k^1)) >> 8) & 0xFF 618 update_keys(c) 619 append(c) 620 return bytes(result) 621 622 return decrypter 623 624 625class LZMACompressor: 626 627 def __init__(self): 628 self._comp = None 629 630 def _init(self): 631 props = lzma._encode_filter_properties({'id': lzma.FILTER_LZMA1}) 632 self._comp = lzma.LZMACompressor(lzma.FORMAT_RAW, filters=[ 633 lzma._decode_filter_properties(lzma.FILTER_LZMA1, props) 634 ]) 635 return struct.pack('<BBH', 9, 4, len(props)) + props 636 637 def compress(self, data): 638 if self._comp is None: 639 return self._init() + self._comp.compress(data) 640 return self._comp.compress(data) 641 642 def flush(self): 643 if self._comp is None: 644 return self._init() + self._comp.flush() 645 return self._comp.flush() 646 647 648class LZMADecompressor: 649 650 def __init__(self): 651 self._decomp = None 652 self._unconsumed = b'' 653 self.eof = False 654 655 def decompress(self, data): 656 if self._decomp is None: 657 self._unconsumed += data 658 if len(self._unconsumed) <= 4: 659 return b'' 660 psize, = struct.unpack('<H', self._unconsumed[2:4]) 661 if len(self._unconsumed) <= 4 + psize: 662 return b'' 663 664 self._decomp = lzma.LZMADecompressor(lzma.FORMAT_RAW, filters=[ 665 lzma._decode_filter_properties(lzma.FILTER_LZMA1, 666 self._unconsumed[4:4 + psize]) 667 ]) 668 data = self._unconsumed[4 + psize:] 669 del self._unconsumed 670 671 result = self._decomp.decompress(data) 672 self.eof = self._decomp.eof 673 return result 674 675 676compressor_names = { 677 0: 'store', 678 1: 'shrink', 679 2: 'reduce', 680 3: 'reduce', 681 4: 'reduce', 682 5: 'reduce', 683 6: 'implode', 684 7: 'tokenize', 685 8: 'deflate', 686 9: 'deflate64', 687 10: 'implode', 688 12: 'bzip2', 689 14: 'lzma', 690 18: 'terse', 691 19: 'lz77', 692 97: 'wavpack', 693 98: 'ppmd', 694} 695 696def _check_compression(compression): 697 if compression == ZIP_STORED: 698 pass 699 elif compression == ZIP_DEFLATED: 700 if not zlib: 701 raise RuntimeError( 702 "Compression requires the (missing) zlib module") 703 elif compression == ZIP_BZIP2: 704 if not bz2: 705 raise RuntimeError( 706 "Compression requires the (missing) bz2 module") 707 elif compression == ZIP_LZMA: 708 if not lzma: 709 raise RuntimeError( 710 "Compression requires the (missing) lzma module") 711 else: 712 raise NotImplementedError("That compression method is not supported") 713 714 715def _get_compressor(compress_type, compresslevel=None): 716 if compress_type == ZIP_DEFLATED: 717 if compresslevel is not None: 718 return zlib.compressobj(compresslevel, zlib.DEFLATED, -15) 719 return zlib.compressobj(zlib.Z_DEFAULT_COMPRESSION, zlib.DEFLATED, -15) 720 elif compress_type == ZIP_BZIP2: 721 if compresslevel is not None: 722 return bz2.BZ2Compressor(compresslevel) 723 return bz2.BZ2Compressor() 724 # compresslevel is ignored for ZIP_LZMA 725 elif compress_type == ZIP_LZMA: 726 return LZMACompressor() 727 else: 728 return None 729 730 731def _get_decompressor(compress_type): 732 _check_compression(compress_type) 733 if compress_type == ZIP_STORED: 734 return None 735 elif compress_type == ZIP_DEFLATED: 736 return zlib.decompressobj(-15) 737 elif compress_type == ZIP_BZIP2: 738 return bz2.BZ2Decompressor() 739 elif compress_type == ZIP_LZMA: 740 return LZMADecompressor() 741 else: 742 descr = compressor_names.get(compress_type) 743 if descr: 744 raise NotImplementedError("compression type %d (%s)" % (compress_type, descr)) 745 else: 746 raise NotImplementedError("compression type %d" % (compress_type,)) 747 748 749class _SharedFile: 750 def __init__(self, file, pos, close, lock, writing): 751 self._file = file 752 self._pos = pos 753 self._close = close 754 self._lock = lock 755 self._writing = writing 756 self.seekable = file.seekable 757 758 def tell(self): 759 return self._pos 760 761 def seek(self, offset, whence=0): 762 with self._lock: 763 if self._writing(): 764 raise ValueError("Can't reposition in the ZIP file while " 765 "there is an open writing handle on it. " 766 "Close the writing handle before trying to read.") 767 self._file.seek(offset, whence) 768 self._pos = self._file.tell() 769 return self._pos 770 771 def read(self, n=-1): 772 with self._lock: 773 if self._writing(): 774 raise ValueError("Can't read from the ZIP file while there " 775 "is an open writing handle on it. " 776 "Close the writing handle before trying to read.") 777 self._file.seek(self._pos) 778 data = self._file.read(n) 779 self._pos = self._file.tell() 780 return data 781 782 def close(self): 783 if self._file is not None: 784 fileobj = self._file 785 self._file = None 786 self._close(fileobj) 787 788# Provide the tell method for unseekable stream 789class _Tellable: 790 def __init__(self, fp): 791 self.fp = fp 792 self.offset = 0 793 794 def write(self, data): 795 n = self.fp.write(data) 796 self.offset += n 797 return n 798 799 def tell(self): 800 return self.offset 801 802 def flush(self): 803 self.fp.flush() 804 805 def close(self): 806 self.fp.close() 807 808 809class ZipExtFile(io.BufferedIOBase): 810 """File-like object for reading an archive member. 811 Is returned by ZipFile.open(). 812 """ 813 814 # Max size supported by decompressor. 815 MAX_N = 1 << 31 - 1 816 817 # Read from compressed files in 4k blocks. 818 MIN_READ_SIZE = 4096 819 820 # Chunk size to read during seek 821 MAX_SEEK_READ = 1 << 24 822 823 def __init__(self, fileobj, mode, zipinfo, pwd=None, 824 close_fileobj=False): 825 self._fileobj = fileobj 826 self._pwd = pwd 827 self._close_fileobj = close_fileobj 828 829 self._compress_type = zipinfo.compress_type 830 self._compress_left = zipinfo.compress_size 831 self._left = zipinfo.file_size 832 833 self._decompressor = _get_decompressor(self._compress_type) 834 835 self._eof = False 836 self._readbuffer = b'' 837 self._offset = 0 838 839 self.newlines = None 840 841 self.mode = mode 842 self.name = zipinfo.filename 843 844 if hasattr(zipinfo, 'CRC'): 845 self._expected_crc = zipinfo.CRC 846 self._running_crc = crc32(b'') 847 else: 848 self._expected_crc = None 849 850 self._seekable = False 851 try: 852 if fileobj.seekable(): 853 self._orig_compress_start = fileobj.tell() 854 self._orig_compress_size = zipinfo.compress_size 855 self._orig_file_size = zipinfo.file_size 856 self._orig_start_crc = self._running_crc 857 self._seekable = True 858 except AttributeError: 859 pass 860 861 self._decrypter = None 862 if pwd: 863 if zipinfo.flag_bits & _MASK_USE_DATA_DESCRIPTOR: 864 # compare against the file type from extended local headers 865 check_byte = (zipinfo._raw_time >> 8) & 0xff 866 else: 867 # compare against the CRC otherwise 868 check_byte = (zipinfo.CRC >> 24) & 0xff 869 h = self._init_decrypter() 870 if h != check_byte: 871 raise RuntimeError("Bad password for file %r" % zipinfo.orig_filename) 872 873 874 def _init_decrypter(self): 875 self._decrypter = _ZipDecrypter(self._pwd) 876 # The first 12 bytes in the cypher stream is an encryption header 877 # used to strengthen the algorithm. The first 11 bytes are 878 # completely random, while the 12th contains the MSB of the CRC, 879 # or the MSB of the file time depending on the header type 880 # and is used to check the correctness of the password. 881 header = self._fileobj.read(12) 882 self._compress_left -= 12 883 return self._decrypter(header)[11] 884 885 def __repr__(self): 886 result = ['<%s.%s' % (self.__class__.__module__, 887 self.__class__.__qualname__)] 888 if not self.closed: 889 result.append(' name=%r mode=%r' % (self.name, self.mode)) 890 if self._compress_type != ZIP_STORED: 891 result.append(' compress_type=%s' % 892 compressor_names.get(self._compress_type, 893 self._compress_type)) 894 else: 895 result.append(' [closed]') 896 result.append('>') 897 return ''.join(result) 898 899 def readline(self, limit=-1): 900 """Read and return a line from the stream. 901 902 If limit is specified, at most limit bytes will be read. 903 """ 904 905 if limit < 0: 906 # Shortcut common case - newline found in buffer. 907 i = self._readbuffer.find(b'\n', self._offset) + 1 908 if i > 0: 909 line = self._readbuffer[self._offset: i] 910 self._offset = i 911 return line 912 913 return io.BufferedIOBase.readline(self, limit) 914 915 def peek(self, n=1): 916 """Returns buffered bytes without advancing the position.""" 917 if n > len(self._readbuffer) - self._offset: 918 chunk = self.read(n) 919 if len(chunk) > self._offset: 920 self._readbuffer = chunk + self._readbuffer[self._offset:] 921 self._offset = 0 922 else: 923 self._offset -= len(chunk) 924 925 # Return up to 512 bytes to reduce allocation overhead for tight loops. 926 return self._readbuffer[self._offset: self._offset + 512] 927 928 def readable(self): 929 if self.closed: 930 raise ValueError("I/O operation on closed file.") 931 return True 932 933 def read(self, n=-1): 934 """Read and return up to n bytes. 935 If the argument is omitted, None, or negative, data is read and returned until EOF is reached. 936 """ 937 if self.closed: 938 raise ValueError("read from closed file.") 939 if n is None or n < 0: 940 buf = self._readbuffer[self._offset:] 941 self._readbuffer = b'' 942 self._offset = 0 943 while not self._eof: 944 buf += self._read1(self.MAX_N) 945 return buf 946 947 end = n + self._offset 948 if end < len(self._readbuffer): 949 buf = self._readbuffer[self._offset:end] 950 self._offset = end 951 return buf 952 953 n = end - len(self._readbuffer) 954 buf = self._readbuffer[self._offset:] 955 self._readbuffer = b'' 956 self._offset = 0 957 while n > 0 and not self._eof: 958 data = self._read1(n) 959 if n < len(data): 960 self._readbuffer = data 961 self._offset = n 962 buf += data[:n] 963 break 964 buf += data 965 n -= len(data) 966 return buf 967 968 def _update_crc(self, newdata): 969 # Update the CRC using the given data. 970 if self._expected_crc is None: 971 # No need to compute the CRC if we don't have a reference value 972 return 973 self._running_crc = crc32(newdata, self._running_crc) 974 # Check the CRC if we're at the end of the file 975 if self._eof and self._running_crc != self._expected_crc: 976 raise BadZipFile("Bad CRC-32 for file %r" % self.name) 977 978 def read1(self, n): 979 """Read up to n bytes with at most one read() system call.""" 980 981 if n is None or n < 0: 982 buf = self._readbuffer[self._offset:] 983 self._readbuffer = b'' 984 self._offset = 0 985 while not self._eof: 986 data = self._read1(self.MAX_N) 987 if data: 988 buf += data 989 break 990 return buf 991 992 end = n + self._offset 993 if end < len(self._readbuffer): 994 buf = self._readbuffer[self._offset:end] 995 self._offset = end 996 return buf 997 998 n = end - len(self._readbuffer) 999 buf = self._readbuffer[self._offset:] 1000 self._readbuffer = b'' 1001 self._offset = 0 1002 if n > 0: 1003 while not self._eof: 1004 data = self._read1(n) 1005 if n < len(data): 1006 self._readbuffer = data 1007 self._offset = n 1008 buf += data[:n] 1009 break 1010 if data: 1011 buf += data 1012 break 1013 return buf 1014 1015 def _read1(self, n): 1016 # Read up to n compressed bytes with at most one read() system call, 1017 # decrypt and decompress them. 1018 if self._eof or n <= 0: 1019 return b'' 1020 1021 # Read from file. 1022 if self._compress_type == ZIP_DEFLATED: 1023 ## Handle unconsumed data. 1024 data = self._decompressor.unconsumed_tail 1025 if n > len(data): 1026 data += self._read2(n - len(data)) 1027 else: 1028 data = self._read2(n) 1029 1030 if self._compress_type == ZIP_STORED: 1031 self._eof = self._compress_left <= 0 1032 elif self._compress_type == ZIP_DEFLATED: 1033 n = max(n, self.MIN_READ_SIZE) 1034 data = self._decompressor.decompress(data, n) 1035 self._eof = (self._decompressor.eof or 1036 self._compress_left <= 0 and 1037 not self._decompressor.unconsumed_tail) 1038 if self._eof: 1039 data += self._decompressor.flush() 1040 else: 1041 data = self._decompressor.decompress(data) 1042 self._eof = self._decompressor.eof or self._compress_left <= 0 1043 1044 data = data[:self._left] 1045 self._left -= len(data) 1046 if self._left <= 0: 1047 self._eof = True 1048 self._update_crc(data) 1049 return data 1050 1051 def _read2(self, n): 1052 if self._compress_left <= 0: 1053 return b'' 1054 1055 n = max(n, self.MIN_READ_SIZE) 1056 n = min(n, self._compress_left) 1057 1058 data = self._fileobj.read(n) 1059 self._compress_left -= len(data) 1060 if not data: 1061 raise EOFError 1062 1063 if self._decrypter is not None: 1064 data = self._decrypter(data) 1065 return data 1066 1067 def close(self): 1068 try: 1069 if self._close_fileobj: 1070 self._fileobj.close() 1071 finally: 1072 super().close() 1073 1074 def seekable(self): 1075 if self.closed: 1076 raise ValueError("I/O operation on closed file.") 1077 return self._seekable 1078 1079 def seek(self, offset, whence=0): 1080 if self.closed: 1081 raise ValueError("seek on closed file.") 1082 if not self._seekable: 1083 raise io.UnsupportedOperation("underlying stream is not seekable") 1084 curr_pos = self.tell() 1085 if whence == 0: # Seek from start of file 1086 new_pos = offset 1087 elif whence == 1: # Seek from current position 1088 new_pos = curr_pos + offset 1089 elif whence == 2: # Seek from EOF 1090 new_pos = self._orig_file_size + offset 1091 else: 1092 raise ValueError("whence must be os.SEEK_SET (0), " 1093 "os.SEEK_CUR (1), or os.SEEK_END (2)") 1094 1095 if new_pos > self._orig_file_size: 1096 new_pos = self._orig_file_size 1097 1098 if new_pos < 0: 1099 new_pos = 0 1100 1101 read_offset = new_pos - curr_pos 1102 buff_offset = read_offset + self._offset 1103 1104 if buff_offset >= 0 and buff_offset < len(self._readbuffer): 1105 # Just move the _offset index if the new position is in the _readbuffer 1106 self._offset = buff_offset 1107 read_offset = 0 1108 elif read_offset < 0: 1109 # Position is before the current position. Reset the ZipExtFile 1110 self._fileobj.seek(self._orig_compress_start) 1111 self._running_crc = self._orig_start_crc 1112 self._compress_left = self._orig_compress_size 1113 self._left = self._orig_file_size 1114 self._readbuffer = b'' 1115 self._offset = 0 1116 self._decompressor = _get_decompressor(self._compress_type) 1117 self._eof = False 1118 read_offset = new_pos 1119 if self._decrypter is not None: 1120 self._init_decrypter() 1121 1122 while read_offset > 0: 1123 read_len = min(self.MAX_SEEK_READ, read_offset) 1124 self.read(read_len) 1125 read_offset -= read_len 1126 1127 return self.tell() 1128 1129 def tell(self): 1130 if self.closed: 1131 raise ValueError("tell on closed file.") 1132 if not self._seekable: 1133 raise io.UnsupportedOperation("underlying stream is not seekable") 1134 filepos = self._orig_file_size - self._left - len(self._readbuffer) + self._offset 1135 return filepos 1136 1137 1138class _ZipWriteFile(io.BufferedIOBase): 1139 def __init__(self, zf, zinfo, zip64): 1140 self._zinfo = zinfo 1141 self._zip64 = zip64 1142 self._zipfile = zf 1143 self._compressor = _get_compressor(zinfo.compress_type, 1144 zinfo._compresslevel) 1145 self._file_size = 0 1146 self._compress_size = 0 1147 self._crc = 0 1148 1149 @property 1150 def _fileobj(self): 1151 return self._zipfile.fp 1152 1153 def writable(self): 1154 return True 1155 1156 def write(self, data): 1157 if self.closed: 1158 raise ValueError('I/O operation on closed file.') 1159 1160 # Accept any data that supports the buffer protocol 1161 if isinstance(data, (bytes, bytearray)): 1162 nbytes = len(data) 1163 else: 1164 data = memoryview(data) 1165 nbytes = data.nbytes 1166 self._file_size += nbytes 1167 1168 self._crc = crc32(data, self._crc) 1169 if self._compressor: 1170 data = self._compressor.compress(data) 1171 self._compress_size += len(data) 1172 self._fileobj.write(data) 1173 return nbytes 1174 1175 def close(self): 1176 if self.closed: 1177 return 1178 try: 1179 super().close() 1180 # Flush any data from the compressor, and update header info 1181 if self._compressor: 1182 buf = self._compressor.flush() 1183 self._compress_size += len(buf) 1184 self._fileobj.write(buf) 1185 self._zinfo.compress_size = self._compress_size 1186 else: 1187 self._zinfo.compress_size = self._file_size 1188 self._zinfo.CRC = self._crc 1189 self._zinfo.file_size = self._file_size 1190 1191 if not self._zip64: 1192 if self._file_size > ZIP64_LIMIT: 1193 raise RuntimeError("File size too large, try using force_zip64") 1194 if self._compress_size > ZIP64_LIMIT: 1195 raise RuntimeError("Compressed size too large, try using force_zip64") 1196 1197 # Write updated header info 1198 if self._zinfo.flag_bits & _MASK_USE_DATA_DESCRIPTOR: 1199 # Write CRC and file sizes after the file data 1200 fmt = '<LLQQ' if self._zip64 else '<LLLL' 1201 self._fileobj.write(struct.pack(fmt, _DD_SIGNATURE, self._zinfo.CRC, 1202 self._zinfo.compress_size, self._zinfo.file_size)) 1203 self._zipfile.start_dir = self._fileobj.tell() 1204 else: 1205 # Seek backwards and write file header (which will now include 1206 # correct CRC and file sizes) 1207 1208 # Preserve current position in file 1209 self._zipfile.start_dir = self._fileobj.tell() 1210 self._fileobj.seek(self._zinfo.header_offset) 1211 self._fileobj.write(self._zinfo.FileHeader(self._zip64)) 1212 self._fileobj.seek(self._zipfile.start_dir) 1213 1214 # Successfully written: Add file to our caches 1215 self._zipfile.filelist.append(self._zinfo) 1216 self._zipfile.NameToInfo[self._zinfo.filename] = self._zinfo 1217 finally: 1218 self._zipfile._writing = False 1219 1220 1221 1222class ZipFile: 1223 """ Class with methods to open, read, write, close, list zip files. 1224 1225 z = ZipFile(file, mode="r", compression=ZIP_STORED, allowZip64=True, 1226 compresslevel=None) 1227 1228 file: Either the path to the file, or a file-like object. 1229 If it is a path, the file will be opened and closed by ZipFile. 1230 mode: The mode can be either read 'r', write 'w', exclusive create 'x', 1231 or append 'a'. 1232 compression: ZIP_STORED (no compression), ZIP_DEFLATED (requires zlib), 1233 ZIP_BZIP2 (requires bz2) or ZIP_LZMA (requires lzma). 1234 allowZip64: if True ZipFile will create files with ZIP64 extensions when 1235 needed, otherwise it will raise an exception when this would 1236 be necessary. 1237 compresslevel: None (default for the given compression type) or an integer 1238 specifying the level to pass to the compressor. 1239 When using ZIP_STORED or ZIP_LZMA this keyword has no effect. 1240 When using ZIP_DEFLATED integers 0 through 9 are accepted. 1241 When using ZIP_BZIP2 integers 1 through 9 are accepted. 1242 1243 """ 1244 1245 fp = None # Set here since __del__ checks it 1246 _windows_illegal_name_trans_table = None 1247 1248 def __init__(self, file, mode="r", compression=ZIP_STORED, allowZip64=True, 1249 compresslevel=None, *, strict_timestamps=True, metadata_encoding=None): 1250 """Open the ZIP file with mode read 'r', write 'w', exclusive create 'x', 1251 or append 'a'.""" 1252 if mode not in ('r', 'w', 'x', 'a'): 1253 raise ValueError("ZipFile requires mode 'r', 'w', 'x', or 'a'") 1254 1255 _check_compression(compression) 1256 1257 self._allowZip64 = allowZip64 1258 self._didModify = False 1259 self.debug = 0 # Level of printing: 0 through 3 1260 self.NameToInfo = {} # Find file info given name 1261 self.filelist = [] # List of ZipInfo instances for archive 1262 self.compression = compression # Method of compression 1263 self.compresslevel = compresslevel 1264 self.mode = mode 1265 self.pwd = None 1266 self._comment = b'' 1267 self._strict_timestamps = strict_timestamps 1268 self.metadata_encoding = metadata_encoding 1269 1270 # Check that we don't try to write with nonconforming codecs 1271 if self.metadata_encoding and mode != 'r': 1272 raise ValueError( 1273 "metadata_encoding is only supported for reading files") 1274 1275 # Check if we were passed a file-like object 1276 if isinstance(file, os.PathLike): 1277 file = os.fspath(file) 1278 if isinstance(file, str): 1279 # No, it's a filename 1280 self._filePassed = 0 1281 self.filename = file 1282 modeDict = {'r' : 'rb', 'w': 'w+b', 'x': 'x+b', 'a' : 'r+b', 1283 'r+b': 'w+b', 'w+b': 'wb', 'x+b': 'xb'} 1284 filemode = modeDict[mode] 1285 while True: 1286 try: 1287 self.fp = io.open(file, filemode) 1288 except OSError: 1289 if filemode in modeDict: 1290 filemode = modeDict[filemode] 1291 continue 1292 raise 1293 break 1294 else: 1295 self._filePassed = 1 1296 self.fp = file 1297 self.filename = getattr(file, 'name', None) 1298 self._fileRefCnt = 1 1299 self._lock = threading.RLock() 1300 self._seekable = True 1301 self._writing = False 1302 1303 try: 1304 if mode == 'r': 1305 self._RealGetContents() 1306 elif mode in ('w', 'x'): 1307 # set the modified flag so central directory gets written 1308 # even if no files are added to the archive 1309 self._didModify = True 1310 try: 1311 self.start_dir = self.fp.tell() 1312 except (AttributeError, OSError): 1313 self.fp = _Tellable(self.fp) 1314 self.start_dir = 0 1315 self._seekable = False 1316 else: 1317 # Some file-like objects can provide tell() but not seek() 1318 try: 1319 self.fp.seek(self.start_dir) 1320 except (AttributeError, OSError): 1321 self._seekable = False 1322 elif mode == 'a': 1323 try: 1324 # See if file is a zip file 1325 self._RealGetContents() 1326 # seek to start of directory and overwrite 1327 self.fp.seek(self.start_dir) 1328 except BadZipFile: 1329 # file is not a zip file, just append 1330 self.fp.seek(0, 2) 1331 1332 # set the modified flag so central directory gets written 1333 # even if no files are added to the archive 1334 self._didModify = True 1335 self.start_dir = self.fp.tell() 1336 else: 1337 raise ValueError("Mode must be 'r', 'w', 'x', or 'a'") 1338 except: 1339 fp = self.fp 1340 self.fp = None 1341 self._fpclose(fp) 1342 raise 1343 1344 def __enter__(self): 1345 return self 1346 1347 def __exit__(self, type, value, traceback): 1348 self.close() 1349 1350 def __repr__(self): 1351 result = ['<%s.%s' % (self.__class__.__module__, 1352 self.__class__.__qualname__)] 1353 if self.fp is not None: 1354 if self._filePassed: 1355 result.append(' file=%r' % self.fp) 1356 elif self.filename is not None: 1357 result.append(' filename=%r' % self.filename) 1358 result.append(' mode=%r' % self.mode) 1359 else: 1360 result.append(' [closed]') 1361 result.append('>') 1362 return ''.join(result) 1363 1364 def _RealGetContents(self): 1365 """Read in the table of contents for the ZIP file.""" 1366 fp = self.fp 1367 try: 1368 endrec = _EndRecData(fp) 1369 except OSError: 1370 raise BadZipFile("File is not a zip file") 1371 if not endrec: 1372 raise BadZipFile("File is not a zip file") 1373 if self.debug > 1: 1374 print(endrec) 1375 size_cd = endrec[_ECD_SIZE] # bytes in central directory 1376 offset_cd = endrec[_ECD_OFFSET] # offset of central directory 1377 self._comment = endrec[_ECD_COMMENT] # archive comment 1378 1379 # "concat" is zero, unless zip was concatenated to another file 1380 concat = endrec[_ECD_LOCATION] - size_cd - offset_cd 1381 if endrec[_ECD_SIGNATURE] == stringEndArchive64: 1382 # If Zip64 extension structures are present, account for them 1383 concat -= (sizeEndCentDir64 + sizeEndCentDir64Locator) 1384 1385 if self.debug > 2: 1386 inferred = concat + offset_cd 1387 print("given, inferred, offset", offset_cd, inferred, concat) 1388 # self.start_dir: Position of start of central directory 1389 self.start_dir = offset_cd + concat 1390 if self.start_dir < 0: 1391 raise BadZipFile("Bad offset for central directory") 1392 fp.seek(self.start_dir, 0) 1393 data = fp.read(size_cd) 1394 fp = io.BytesIO(data) 1395 total = 0 1396 while total < size_cd: 1397 centdir = fp.read(sizeCentralDir) 1398 if len(centdir) != sizeCentralDir: 1399 raise BadZipFile("Truncated central directory") 1400 centdir = struct.unpack(structCentralDir, centdir) 1401 if centdir[_CD_SIGNATURE] != stringCentralDir: 1402 raise BadZipFile("Bad magic number for central directory") 1403 if self.debug > 2: 1404 print(centdir) 1405 filename = fp.read(centdir[_CD_FILENAME_LENGTH]) 1406 flags = centdir[_CD_FLAG_BITS] 1407 if flags & _MASK_UTF_FILENAME: 1408 # UTF-8 file names extension 1409 filename = filename.decode('utf-8') 1410 else: 1411 # Historical ZIP filename encoding 1412 filename = filename.decode(self.metadata_encoding or 'cp437') 1413 # Create ZipInfo instance to store file information 1414 x = ZipInfo(filename) 1415 x.extra = fp.read(centdir[_CD_EXTRA_FIELD_LENGTH]) 1416 x.comment = fp.read(centdir[_CD_COMMENT_LENGTH]) 1417 x.header_offset = centdir[_CD_LOCAL_HEADER_OFFSET] 1418 (x.create_version, x.create_system, x.extract_version, x.reserved, 1419 x.flag_bits, x.compress_type, t, d, 1420 x.CRC, x.compress_size, x.file_size) = centdir[1:12] 1421 if x.extract_version > MAX_EXTRACT_VERSION: 1422 raise NotImplementedError("zip file version %.1f" % 1423 (x.extract_version / 10)) 1424 x.volume, x.internal_attr, x.external_attr = centdir[15:18] 1425 # Convert date/time code to (year, month, day, hour, min, sec) 1426 x._raw_time = t 1427 x.date_time = ( (d>>9)+1980, (d>>5)&0xF, d&0x1F, 1428 t>>11, (t>>5)&0x3F, (t&0x1F) * 2 ) 1429 1430 x._decodeExtra() 1431 x.header_offset = x.header_offset + concat 1432 self.filelist.append(x) 1433 self.NameToInfo[x.filename] = x 1434 1435 # update total bytes read from central directory 1436 total = (total + sizeCentralDir + centdir[_CD_FILENAME_LENGTH] 1437 + centdir[_CD_EXTRA_FIELD_LENGTH] 1438 + centdir[_CD_COMMENT_LENGTH]) 1439 1440 if self.debug > 2: 1441 print("total", total) 1442 1443 end_offset = self.start_dir 1444 for zinfo in sorted(self.filelist, 1445 key=lambda zinfo: zinfo.header_offset, 1446 reverse=True): 1447 zinfo._end_offset = end_offset 1448 end_offset = zinfo.header_offset 1449 1450 def namelist(self): 1451 """Return a list of file names in the archive.""" 1452 return [data.filename for data in self.filelist] 1453 1454 def infolist(self): 1455 """Return a list of class ZipInfo instances for files in the 1456 archive.""" 1457 return self.filelist 1458 1459 def printdir(self, file=None): 1460 """Print a table of contents for the zip file.""" 1461 print("%-46s %19s %12s" % ("File Name", "Modified ", "Size"), 1462 file=file) 1463 for zinfo in self.filelist: 1464 date = "%d-%02d-%02d %02d:%02d:%02d" % zinfo.date_time[:6] 1465 print("%-46s %s %12d" % (zinfo.filename, date, zinfo.file_size), 1466 file=file) 1467 1468 def testzip(self): 1469 """Read all the files and check the CRC.""" 1470 chunk_size = 2 ** 20 1471 for zinfo in self.filelist: 1472 try: 1473 # Read by chunks, to avoid an OverflowError or a 1474 # MemoryError with very large embedded files. 1475 with self.open(zinfo.filename, "r") as f: 1476 while f.read(chunk_size): # Check CRC-32 1477 pass 1478 except BadZipFile: 1479 return zinfo.filename 1480 1481 def getinfo(self, name): 1482 """Return the instance of ZipInfo given 'name'.""" 1483 info = self.NameToInfo.get(name) 1484 if info is None: 1485 raise KeyError( 1486 'There is no item named %r in the archive' % name) 1487 1488 return info 1489 1490 def setpassword(self, pwd): 1491 """Set default password for encrypted files.""" 1492 if pwd and not isinstance(pwd, bytes): 1493 raise TypeError("pwd: expected bytes, got %s" % type(pwd).__name__) 1494 if pwd: 1495 self.pwd = pwd 1496 else: 1497 self.pwd = None 1498 1499 @property 1500 def comment(self): 1501 """The comment text associated with the ZIP file.""" 1502 return self._comment 1503 1504 @comment.setter 1505 def comment(self, comment): 1506 if not isinstance(comment, bytes): 1507 raise TypeError("comment: expected bytes, got %s" % type(comment).__name__) 1508 # check for valid comment length 1509 if len(comment) > ZIP_MAX_COMMENT: 1510 import warnings 1511 warnings.warn('Archive comment is too long; truncating to %d bytes' 1512 % ZIP_MAX_COMMENT, stacklevel=2) 1513 comment = comment[:ZIP_MAX_COMMENT] 1514 self._comment = comment 1515 self._didModify = True 1516 1517 def read(self, name, pwd=None): 1518 """Return file bytes for name.""" 1519 with self.open(name, "r", pwd) as fp: 1520 return fp.read() 1521 1522 def open(self, name, mode="r", pwd=None, *, force_zip64=False): 1523 """Return file-like object for 'name'. 1524 1525 name is a string for the file name within the ZIP file, or a ZipInfo 1526 object. 1527 1528 mode should be 'r' to read a file already in the ZIP file, or 'w' to 1529 write to a file newly added to the archive. 1530 1531 pwd is the password to decrypt files (only used for reading). 1532 1533 When writing, if the file size is not known in advance but may exceed 1534 2 GiB, pass force_zip64 to use the ZIP64 format, which can handle large 1535 files. If the size is known in advance, it is best to pass a ZipInfo 1536 instance for name, with zinfo.file_size set. 1537 """ 1538 if mode not in {"r", "w"}: 1539 raise ValueError('open() requires mode "r" or "w"') 1540 if pwd and (mode == "w"): 1541 raise ValueError("pwd is only supported for reading files") 1542 if not self.fp: 1543 raise ValueError( 1544 "Attempt to use ZIP archive that was already closed") 1545 1546 # Make sure we have an info object 1547 if isinstance(name, ZipInfo): 1548 # 'name' is already an info object 1549 zinfo = name 1550 elif mode == 'w': 1551 zinfo = ZipInfo(name) 1552 zinfo.compress_type = self.compression 1553 zinfo._compresslevel = self.compresslevel 1554 else: 1555 # Get info object for name 1556 zinfo = self.getinfo(name) 1557 1558 if mode == 'w': 1559 return self._open_to_write(zinfo, force_zip64=force_zip64) 1560 1561 if self._writing: 1562 raise ValueError("Can't read from the ZIP file while there " 1563 "is an open writing handle on it. " 1564 "Close the writing handle before trying to read.") 1565 1566 # Open for reading: 1567 self._fileRefCnt += 1 1568 zef_file = _SharedFile(self.fp, zinfo.header_offset, 1569 self._fpclose, self._lock, lambda: self._writing) 1570 try: 1571 # Skip the file header: 1572 fheader = zef_file.read(sizeFileHeader) 1573 if len(fheader) != sizeFileHeader: 1574 raise BadZipFile("Truncated file header") 1575 fheader = struct.unpack(structFileHeader, fheader) 1576 if fheader[_FH_SIGNATURE] != stringFileHeader: 1577 raise BadZipFile("Bad magic number for file header") 1578 1579 fname = zef_file.read(fheader[_FH_FILENAME_LENGTH]) 1580 if fheader[_FH_EXTRA_FIELD_LENGTH]: 1581 zef_file.read(fheader[_FH_EXTRA_FIELD_LENGTH]) 1582 1583 if zinfo.flag_bits & _MASK_COMPRESSED_PATCH: 1584 # Zip 2.7: compressed patched data 1585 raise NotImplementedError("compressed patched data (flag bit 5)") 1586 1587 if zinfo.flag_bits & _MASK_STRONG_ENCRYPTION: 1588 # strong encryption 1589 raise NotImplementedError("strong encryption (flag bit 6)") 1590 1591 if fheader[_FH_GENERAL_PURPOSE_FLAG_BITS] & _MASK_UTF_FILENAME: 1592 # UTF-8 filename 1593 fname_str = fname.decode("utf-8") 1594 else: 1595 fname_str = fname.decode(self.metadata_encoding or "cp437") 1596 1597 if fname_str != zinfo.orig_filename: 1598 raise BadZipFile( 1599 'File name in directory %r and header %r differ.' 1600 % (zinfo.orig_filename, fname)) 1601 1602 if (zinfo._end_offset is not None and 1603 zef_file.tell() + zinfo.compress_size > zinfo._end_offset): 1604 raise BadZipFile(f"Overlapped entries: {zinfo.orig_filename!r} (possible zip bomb)") 1605 1606 # check for encrypted flag & handle password 1607 is_encrypted = zinfo.flag_bits & _MASK_ENCRYPTED 1608 if is_encrypted: 1609 if not pwd: 1610 pwd = self.pwd 1611 if pwd and not isinstance(pwd, bytes): 1612 raise TypeError("pwd: expected bytes, got %s" % type(pwd).__name__) 1613 if not pwd: 1614 raise RuntimeError("File %r is encrypted, password " 1615 "required for extraction" % name) 1616 else: 1617 pwd = None 1618 1619 return ZipExtFile(zef_file, mode, zinfo, pwd, True) 1620 except: 1621 zef_file.close() 1622 raise 1623 1624 def _open_to_write(self, zinfo, force_zip64=False): 1625 if force_zip64 and not self._allowZip64: 1626 raise ValueError( 1627 "force_zip64 is True, but allowZip64 was False when opening " 1628 "the ZIP file." 1629 ) 1630 if self._writing: 1631 raise ValueError("Can't write to the ZIP file while there is " 1632 "another write handle open on it. " 1633 "Close the first handle before opening another.") 1634 1635 # Size and CRC are overwritten with correct data after processing the file 1636 zinfo.compress_size = 0 1637 zinfo.CRC = 0 1638 1639 zinfo.flag_bits = 0x00 1640 if zinfo.compress_type == ZIP_LZMA: 1641 # Compressed data includes an end-of-stream (EOS) marker 1642 zinfo.flag_bits |= _MASK_COMPRESS_OPTION_1 1643 if not self._seekable: 1644 zinfo.flag_bits |= _MASK_USE_DATA_DESCRIPTOR 1645 1646 if not zinfo.external_attr: 1647 zinfo.external_attr = 0o600 << 16 # permissions: ?rw------- 1648 1649 # Compressed size can be larger than uncompressed size 1650 zip64 = force_zip64 or (zinfo.file_size * 1.05 > ZIP64_LIMIT) 1651 if not self._allowZip64 and zip64: 1652 raise LargeZipFile("Filesize would require ZIP64 extensions") 1653 1654 if self._seekable: 1655 self.fp.seek(self.start_dir) 1656 zinfo.header_offset = self.fp.tell() 1657 1658 self._writecheck(zinfo) 1659 self._didModify = True 1660 1661 self.fp.write(zinfo.FileHeader(zip64)) 1662 1663 self._writing = True 1664 return _ZipWriteFile(self, zinfo, zip64) 1665 1666 def extract(self, member, path=None, pwd=None): 1667 """Extract a member from the archive to the current working directory, 1668 using its full name. Its file information is extracted as accurately 1669 as possible. `member' may be a filename or a ZipInfo object. You can 1670 specify a different directory using `path'. 1671 """ 1672 if path is None: 1673 path = os.getcwd() 1674 else: 1675 path = os.fspath(path) 1676 1677 return self._extract_member(member, path, pwd) 1678 1679 def extractall(self, path=None, members=None, pwd=None): 1680 """Extract all members from the archive to the current working 1681 directory. `path' specifies a different directory to extract to. 1682 `members' is optional and must be a subset of the list returned 1683 by namelist(). 1684 """ 1685 if members is None: 1686 members = self.namelist() 1687 1688 if path is None: 1689 path = os.getcwd() 1690 else: 1691 path = os.fspath(path) 1692 1693 for zipinfo in members: 1694 self._extract_member(zipinfo, path, pwd) 1695 1696 @classmethod 1697 def _sanitize_windows_name(cls, arcname, pathsep): 1698 """Replace bad characters and remove trailing dots from parts.""" 1699 table = cls._windows_illegal_name_trans_table 1700 if not table: 1701 illegal = ':<>|"?*' 1702 table = str.maketrans(illegal, '_' * len(illegal)) 1703 cls._windows_illegal_name_trans_table = table 1704 arcname = arcname.translate(table) 1705 # remove trailing dots 1706 arcname = (x.rstrip('.') for x in arcname.split(pathsep)) 1707 # rejoin, removing empty parts. 1708 arcname = pathsep.join(x for x in arcname if x) 1709 return arcname 1710 1711 def _extract_member(self, member, targetpath, pwd): 1712 """Extract the ZipInfo object 'member' to a physical 1713 file on the path targetpath. 1714 """ 1715 if not isinstance(member, ZipInfo): 1716 member = self.getinfo(member) 1717 1718 # build the destination pathname, replacing 1719 # forward slashes to platform specific separators. 1720 arcname = member.filename.replace('/', os.path.sep) 1721 1722 if os.path.altsep: 1723 arcname = arcname.replace(os.path.altsep, os.path.sep) 1724 # interpret absolute pathname as relative, remove drive letter or 1725 # UNC path, redundant separators, "." and ".." components. 1726 arcname = os.path.splitdrive(arcname)[1] 1727 invalid_path_parts = ('', os.path.curdir, os.path.pardir) 1728 arcname = os.path.sep.join(x for x in arcname.split(os.path.sep) 1729 if x not in invalid_path_parts) 1730 if os.path.sep == '\\': 1731 # filter illegal characters on Windows 1732 arcname = self._sanitize_windows_name(arcname, os.path.sep) 1733 1734 targetpath = os.path.join(targetpath, arcname) 1735 targetpath = os.path.normpath(targetpath) 1736 1737 # Create all upper directories if necessary. 1738 upperdirs = os.path.dirname(targetpath) 1739 if upperdirs and not os.path.exists(upperdirs): 1740 os.makedirs(upperdirs) 1741 1742 if member.is_dir(): 1743 if not os.path.isdir(targetpath): 1744 os.mkdir(targetpath) 1745 return targetpath 1746 1747 with self.open(member, pwd=pwd) as source, \ 1748 open(targetpath, "wb") as target: 1749 shutil.copyfileobj(source, target) 1750 1751 return targetpath 1752 1753 def _writecheck(self, zinfo): 1754 """Check for errors before writing a file to the archive.""" 1755 if zinfo.filename in self.NameToInfo: 1756 import warnings 1757 warnings.warn('Duplicate name: %r' % zinfo.filename, stacklevel=3) 1758 if self.mode not in ('w', 'x', 'a'): 1759 raise ValueError("write() requires mode 'w', 'x', or 'a'") 1760 if not self.fp: 1761 raise ValueError( 1762 "Attempt to write ZIP archive that was already closed") 1763 _check_compression(zinfo.compress_type) 1764 if not self._allowZip64: 1765 requires_zip64 = None 1766 if len(self.filelist) >= ZIP_FILECOUNT_LIMIT: 1767 requires_zip64 = "Files count" 1768 elif zinfo.file_size > ZIP64_LIMIT: 1769 requires_zip64 = "Filesize" 1770 elif zinfo.header_offset > ZIP64_LIMIT: 1771 requires_zip64 = "Zipfile size" 1772 if requires_zip64: 1773 raise LargeZipFile(requires_zip64 + 1774 " would require ZIP64 extensions") 1775 1776 def write(self, filename, arcname=None, 1777 compress_type=None, compresslevel=None): 1778 """Put the bytes from filename into the archive under the name 1779 arcname.""" 1780 if not self.fp: 1781 raise ValueError( 1782 "Attempt to write to ZIP archive that was already closed") 1783 if self._writing: 1784 raise ValueError( 1785 "Can't write to ZIP archive while an open writing handle exists" 1786 ) 1787 1788 zinfo = ZipInfo.from_file(filename, arcname, 1789 strict_timestamps=self._strict_timestamps) 1790 1791 if zinfo.is_dir(): 1792 zinfo.compress_size = 0 1793 zinfo.CRC = 0 1794 self.mkdir(zinfo) 1795 else: 1796 if compress_type is not None: 1797 zinfo.compress_type = compress_type 1798 else: 1799 zinfo.compress_type = self.compression 1800 1801 if compresslevel is not None: 1802 zinfo._compresslevel = compresslevel 1803 else: 1804 zinfo._compresslevel = self.compresslevel 1805 1806 with open(filename, "rb") as src, self.open(zinfo, 'w') as dest: 1807 shutil.copyfileobj(src, dest, 1024*8) 1808 1809 def writestr(self, zinfo_or_arcname, data, 1810 compress_type=None, compresslevel=None): 1811 """Write a file into the archive. The contents is 'data', which 1812 may be either a 'str' or a 'bytes' instance; if it is a 'str', 1813 it is encoded as UTF-8 first. 1814 'zinfo_or_arcname' is either a ZipInfo instance or 1815 the name of the file in the archive.""" 1816 if isinstance(data, str): 1817 data = data.encode("utf-8") 1818 if not isinstance(zinfo_or_arcname, ZipInfo): 1819 zinfo = ZipInfo(filename=zinfo_or_arcname, 1820 date_time=time.localtime(time.time())[:6]) 1821 zinfo.compress_type = self.compression 1822 zinfo._compresslevel = self.compresslevel 1823 if zinfo.filename[-1] == '/': 1824 zinfo.external_attr = 0o40775 << 16 # drwxrwxr-x 1825 zinfo.external_attr |= 0x10 # MS-DOS directory flag 1826 else: 1827 zinfo.external_attr = 0o600 << 16 # ?rw------- 1828 else: 1829 zinfo = zinfo_or_arcname 1830 1831 if not self.fp: 1832 raise ValueError( 1833 "Attempt to write to ZIP archive that was already closed") 1834 if self._writing: 1835 raise ValueError( 1836 "Can't write to ZIP archive while an open writing handle exists." 1837 ) 1838 1839 if compress_type is not None: 1840 zinfo.compress_type = compress_type 1841 1842 if compresslevel is not None: 1843 zinfo._compresslevel = compresslevel 1844 1845 zinfo.file_size = len(data) # Uncompressed size 1846 with self._lock: 1847 with self.open(zinfo, mode='w') as dest: 1848 dest.write(data) 1849 1850 def mkdir(self, zinfo_or_directory_name, mode=511): 1851 """Creates a directory inside the zip archive.""" 1852 if isinstance(zinfo_or_directory_name, ZipInfo): 1853 zinfo = zinfo_or_directory_name 1854 if not zinfo.is_dir(): 1855 raise ValueError("The given ZipInfo does not describe a directory") 1856 elif isinstance(zinfo_or_directory_name, str): 1857 directory_name = zinfo_or_directory_name 1858 if not directory_name.endswith("/"): 1859 directory_name += "/" 1860 zinfo = ZipInfo(directory_name) 1861 zinfo.compress_size = 0 1862 zinfo.CRC = 0 1863 zinfo.external_attr = ((0o40000 | mode) & 0xFFFF) << 16 1864 zinfo.file_size = 0 1865 zinfo.external_attr |= 0x10 1866 else: 1867 raise TypeError("Expected type str or ZipInfo") 1868 1869 with self._lock: 1870 if self._seekable: 1871 self.fp.seek(self.start_dir) 1872 zinfo.header_offset = self.fp.tell() # Start of header bytes 1873 if zinfo.compress_type == ZIP_LZMA: 1874 # Compressed data includes an end-of-stream (EOS) marker 1875 zinfo.flag_bits |= _MASK_COMPRESS_OPTION_1 1876 1877 self._writecheck(zinfo) 1878 self._didModify = True 1879 1880 self.filelist.append(zinfo) 1881 self.NameToInfo[zinfo.filename] = zinfo 1882 self.fp.write(zinfo.FileHeader(False)) 1883 self.start_dir = self.fp.tell() 1884 1885 def __del__(self): 1886 """Call the "close()" method in case the user forgot.""" 1887 self.close() 1888 1889 def close(self): 1890 """Close the file, and for mode 'w', 'x' and 'a' write the ending 1891 records.""" 1892 if self.fp is None: 1893 return 1894 1895 if self._writing: 1896 raise ValueError("Can't close the ZIP file while there is " 1897 "an open writing handle on it. " 1898 "Close the writing handle before closing the zip.") 1899 1900 try: 1901 if self.mode in ('w', 'x', 'a') and self._didModify: # write ending records 1902 with self._lock: 1903 if self._seekable: 1904 self.fp.seek(self.start_dir) 1905 self._write_end_record() 1906 finally: 1907 fp = self.fp 1908 self.fp = None 1909 self._fpclose(fp) 1910 1911 def _write_end_record(self): 1912 for zinfo in self.filelist: # write central directory 1913 dt = zinfo.date_time 1914 dosdate = (dt[0] - 1980) << 9 | dt[1] << 5 | dt[2] 1915 dostime = dt[3] << 11 | dt[4] << 5 | (dt[5] // 2) 1916 extra = [] 1917 if zinfo.file_size > ZIP64_LIMIT \ 1918 or zinfo.compress_size > ZIP64_LIMIT: 1919 extra.append(zinfo.file_size) 1920 extra.append(zinfo.compress_size) 1921 file_size = 0xffffffff 1922 compress_size = 0xffffffff 1923 else: 1924 file_size = zinfo.file_size 1925 compress_size = zinfo.compress_size 1926 1927 if zinfo.header_offset > ZIP64_LIMIT: 1928 extra.append(zinfo.header_offset) 1929 header_offset = 0xffffffff 1930 else: 1931 header_offset = zinfo.header_offset 1932 1933 extra_data = zinfo.extra 1934 min_version = 0 1935 if extra: 1936 # Append a ZIP64 field to the extra's 1937 extra_data = _strip_extra(extra_data, (1,)) 1938 extra_data = struct.pack( 1939 '<HH' + 'Q'*len(extra), 1940 1, 8*len(extra), *extra) + extra_data 1941 1942 min_version = ZIP64_VERSION 1943 1944 if zinfo.compress_type == ZIP_BZIP2: 1945 min_version = max(BZIP2_VERSION, min_version) 1946 elif zinfo.compress_type == ZIP_LZMA: 1947 min_version = max(LZMA_VERSION, min_version) 1948 1949 extract_version = max(min_version, zinfo.extract_version) 1950 create_version = max(min_version, zinfo.create_version) 1951 filename, flag_bits = zinfo._encodeFilenameFlags() 1952 centdir = struct.pack(structCentralDir, 1953 stringCentralDir, create_version, 1954 zinfo.create_system, extract_version, zinfo.reserved, 1955 flag_bits, zinfo.compress_type, dostime, dosdate, 1956 zinfo.CRC, compress_size, file_size, 1957 len(filename), len(extra_data), len(zinfo.comment), 1958 0, zinfo.internal_attr, zinfo.external_attr, 1959 header_offset) 1960 self.fp.write(centdir) 1961 self.fp.write(filename) 1962 self.fp.write(extra_data) 1963 self.fp.write(zinfo.comment) 1964 1965 pos2 = self.fp.tell() 1966 # Write end-of-zip-archive record 1967 centDirCount = len(self.filelist) 1968 centDirSize = pos2 - self.start_dir 1969 centDirOffset = self.start_dir 1970 requires_zip64 = None 1971 if centDirCount > ZIP_FILECOUNT_LIMIT: 1972 requires_zip64 = "Files count" 1973 elif centDirOffset > ZIP64_LIMIT: 1974 requires_zip64 = "Central directory offset" 1975 elif centDirSize > ZIP64_LIMIT: 1976 requires_zip64 = "Central directory size" 1977 if requires_zip64: 1978 # Need to write the ZIP64 end-of-archive records 1979 if not self._allowZip64: 1980 raise LargeZipFile(requires_zip64 + 1981 " would require ZIP64 extensions") 1982 zip64endrec = struct.pack( 1983 structEndArchive64, stringEndArchive64, 1984 44, 45, 45, 0, 0, centDirCount, centDirCount, 1985 centDirSize, centDirOffset) 1986 self.fp.write(zip64endrec) 1987 1988 zip64locrec = struct.pack( 1989 structEndArchive64Locator, 1990 stringEndArchive64Locator, 0, pos2, 1) 1991 self.fp.write(zip64locrec) 1992 centDirCount = min(centDirCount, 0xFFFF) 1993 centDirSize = min(centDirSize, 0xFFFFFFFF) 1994 centDirOffset = min(centDirOffset, 0xFFFFFFFF) 1995 1996 endrec = struct.pack(structEndArchive, stringEndArchive, 1997 0, 0, centDirCount, centDirCount, 1998 centDirSize, centDirOffset, len(self._comment)) 1999 self.fp.write(endrec) 2000 self.fp.write(self._comment) 2001 if self.mode == "a": 2002 self.fp.truncate() 2003 self.fp.flush() 2004 2005 def _fpclose(self, fp): 2006 assert self._fileRefCnt > 0 2007 self._fileRefCnt -= 1 2008 if not self._fileRefCnt and not self._filePassed: 2009 fp.close() 2010 2011 2012class PyZipFile(ZipFile): 2013 """Class to create ZIP archives with Python library files and packages.""" 2014 2015 def __init__(self, file, mode="r", compression=ZIP_STORED, 2016 allowZip64=True, optimize=-1): 2017 ZipFile.__init__(self, file, mode=mode, compression=compression, 2018 allowZip64=allowZip64) 2019 self._optimize = optimize 2020 2021 def writepy(self, pathname, basename="", filterfunc=None): 2022 """Add all files from "pathname" to the ZIP archive. 2023 2024 If pathname is a package directory, search the directory and 2025 all package subdirectories recursively for all *.py and enter 2026 the modules into the archive. If pathname is a plain 2027 directory, listdir *.py and enter all modules. Else, pathname 2028 must be a Python *.py file and the module will be put into the 2029 archive. Added modules are always module.pyc. 2030 This method will compile the module.py into module.pyc if 2031 necessary. 2032 If filterfunc(pathname) is given, it is called with every argument. 2033 When it is False, the file or directory is skipped. 2034 """ 2035 pathname = os.fspath(pathname) 2036 if filterfunc and not filterfunc(pathname): 2037 if self.debug: 2038 label = 'path' if os.path.isdir(pathname) else 'file' 2039 print('%s %r skipped by filterfunc' % (label, pathname)) 2040 return 2041 dir, name = os.path.split(pathname) 2042 if os.path.isdir(pathname): 2043 initname = os.path.join(pathname, "__init__.py") 2044 if os.path.isfile(initname): 2045 # This is a package directory, add it 2046 if basename: 2047 basename = "%s/%s" % (basename, name) 2048 else: 2049 basename = name 2050 if self.debug: 2051 print("Adding package in", pathname, "as", basename) 2052 fname, arcname = self._get_codename(initname[0:-3], basename) 2053 if self.debug: 2054 print("Adding", arcname) 2055 self.write(fname, arcname) 2056 dirlist = sorted(os.listdir(pathname)) 2057 dirlist.remove("__init__.py") 2058 # Add all *.py files and package subdirectories 2059 for filename in dirlist: 2060 path = os.path.join(pathname, filename) 2061 root, ext = os.path.splitext(filename) 2062 if os.path.isdir(path): 2063 if os.path.isfile(os.path.join(path, "__init__.py")): 2064 # This is a package directory, add it 2065 self.writepy(path, basename, 2066 filterfunc=filterfunc) # Recursive call 2067 elif ext == ".py": 2068 if filterfunc and not filterfunc(path): 2069 if self.debug: 2070 print('file %r skipped by filterfunc' % path) 2071 continue 2072 fname, arcname = self._get_codename(path[0:-3], 2073 basename) 2074 if self.debug: 2075 print("Adding", arcname) 2076 self.write(fname, arcname) 2077 else: 2078 # This is NOT a package directory, add its files at top level 2079 if self.debug: 2080 print("Adding files from directory", pathname) 2081 for filename in sorted(os.listdir(pathname)): 2082 path = os.path.join(pathname, filename) 2083 root, ext = os.path.splitext(filename) 2084 if ext == ".py": 2085 if filterfunc and not filterfunc(path): 2086 if self.debug: 2087 print('file %r skipped by filterfunc' % path) 2088 continue 2089 fname, arcname = self._get_codename(path[0:-3], 2090 basename) 2091 if self.debug: 2092 print("Adding", arcname) 2093 self.write(fname, arcname) 2094 else: 2095 if pathname[-3:] != ".py": 2096 raise RuntimeError( 2097 'Files added with writepy() must end with ".py"') 2098 fname, arcname = self._get_codename(pathname[0:-3], basename) 2099 if self.debug: 2100 print("Adding file", arcname) 2101 self.write(fname, arcname) 2102 2103 def _get_codename(self, pathname, basename): 2104 """Return (filename, archivename) for the path. 2105 2106 Given a module name path, return the correct file path and 2107 archive name, compiling if necessary. For example, given 2108 /python/lib/string, return (/python/lib/string.pyc, string). 2109 """ 2110 def _compile(file, optimize=-1): 2111 import py_compile 2112 if self.debug: 2113 print("Compiling", file) 2114 try: 2115 py_compile.compile(file, doraise=True, optimize=optimize) 2116 except py_compile.PyCompileError as err: 2117 print(err.msg) 2118 return False 2119 return True 2120 2121 file_py = pathname + ".py" 2122 file_pyc = pathname + ".pyc" 2123 pycache_opt0 = importlib.util.cache_from_source(file_py, optimization='') 2124 pycache_opt1 = importlib.util.cache_from_source(file_py, optimization=1) 2125 pycache_opt2 = importlib.util.cache_from_source(file_py, optimization=2) 2126 if self._optimize == -1: 2127 # legacy mode: use whatever file is present 2128 if (os.path.isfile(file_pyc) and 2129 os.stat(file_pyc).st_mtime >= os.stat(file_py).st_mtime): 2130 # Use .pyc file. 2131 arcname = fname = file_pyc 2132 elif (os.path.isfile(pycache_opt0) and 2133 os.stat(pycache_opt0).st_mtime >= os.stat(file_py).st_mtime): 2134 # Use the __pycache__/*.pyc file, but write it to the legacy pyc 2135 # file name in the archive. 2136 fname = pycache_opt0 2137 arcname = file_pyc 2138 elif (os.path.isfile(pycache_opt1) and 2139 os.stat(pycache_opt1).st_mtime >= os.stat(file_py).st_mtime): 2140 # Use the __pycache__/*.pyc file, but write it to the legacy pyc 2141 # file name in the archive. 2142 fname = pycache_opt1 2143 arcname = file_pyc 2144 elif (os.path.isfile(pycache_opt2) and 2145 os.stat(pycache_opt2).st_mtime >= os.stat(file_py).st_mtime): 2146 # Use the __pycache__/*.pyc file, but write it to the legacy pyc 2147 # file name in the archive. 2148 fname = pycache_opt2 2149 arcname = file_pyc 2150 else: 2151 # Compile py into PEP 3147 pyc file. 2152 if _compile(file_py): 2153 if sys.flags.optimize == 0: 2154 fname = pycache_opt0 2155 elif sys.flags.optimize == 1: 2156 fname = pycache_opt1 2157 else: 2158 fname = pycache_opt2 2159 arcname = file_pyc 2160 else: 2161 fname = arcname = file_py 2162 else: 2163 # new mode: use given optimization level 2164 if self._optimize == 0: 2165 fname = pycache_opt0 2166 arcname = file_pyc 2167 else: 2168 arcname = file_pyc 2169 if self._optimize == 1: 2170 fname = pycache_opt1 2171 elif self._optimize == 2: 2172 fname = pycache_opt2 2173 else: 2174 msg = "invalid value for 'optimize': {!r}".format(self._optimize) 2175 raise ValueError(msg) 2176 if not (os.path.isfile(fname) and 2177 os.stat(fname).st_mtime >= os.stat(file_py).st_mtime): 2178 if not _compile(file_py, optimize=self._optimize): 2179 fname = arcname = file_py 2180 archivename = os.path.split(arcname)[1] 2181 if basename: 2182 archivename = "%s/%s" % (basename, archivename) 2183 return (fname, archivename) 2184 2185 2186def _parents(path): 2187 """ 2188 Given a path with elements separated by 2189 posixpath.sep, generate all parents of that path. 2190 2191 >>> list(_parents('b/d')) 2192 ['b'] 2193 >>> list(_parents('/b/d/')) 2194 ['/b'] 2195 >>> list(_parents('b/d/f/')) 2196 ['b/d', 'b'] 2197 >>> list(_parents('b')) 2198 [] 2199 >>> list(_parents('')) 2200 [] 2201 """ 2202 return itertools.islice(_ancestry(path), 1, None) 2203 2204 2205def _ancestry(path): 2206 """ 2207 Given a path with elements separated by 2208 posixpath.sep, generate all elements of that path 2209 2210 >>> list(_ancestry('b/d')) 2211 ['b/d', 'b'] 2212 >>> list(_ancestry('/b/d/')) 2213 ['/b/d', '/b'] 2214 >>> list(_ancestry('b/d/f/')) 2215 ['b/d/f', 'b/d', 'b'] 2216 >>> list(_ancestry('b')) 2217 ['b'] 2218 >>> list(_ancestry('')) 2219 [] 2220 """ 2221 path = path.rstrip(posixpath.sep) 2222 while path and path != posixpath.sep: 2223 yield path 2224 path, tail = posixpath.split(path) 2225 2226 2227_dedupe = dict.fromkeys 2228"""Deduplicate an iterable in original order""" 2229 2230 2231def _difference(minuend, subtrahend): 2232 """ 2233 Return items in minuend not in subtrahend, retaining order 2234 with O(1) lookup. 2235 """ 2236 return itertools.filterfalse(set(subtrahend).__contains__, minuend) 2237 2238 2239class SanitizedNames: 2240 """ 2241 ZipFile mix-in to ensure names are sanitized. 2242 """ 2243 2244 def namelist(self): 2245 return list(map(self._sanitize, super().namelist())) 2246 2247 @staticmethod 2248 def _sanitize(name): 2249 r""" 2250 Ensure a relative path with posix separators and no dot names. 2251 Modeled after 2252 https://github.com/python/cpython/blob/bcc1be39cb1d04ad9fc0bd1b9193d3972835a57c/Lib/zipfile/__init__.py#L1799-L1813 2253 but provides consistent cross-platform behavior. 2254 >>> san = SanitizedNames._sanitize 2255 >>> san('/foo/bar') 2256 'foo/bar' 2257 >>> san('//foo.txt') 2258 'foo.txt' 2259 >>> san('foo/.././bar.txt') 2260 'foo/bar.txt' 2261 >>> san('foo../.bar.txt') 2262 'foo../.bar.txt' 2263 >>> san('\\foo\\bar.txt') 2264 'foo/bar.txt' 2265 >>> san('D:\\foo.txt') 2266 'D/foo.txt' 2267 >>> san('\\\\server\\share\\file.txt') 2268 'server/share/file.txt' 2269 >>> san('\\\\?\\GLOBALROOT\\Volume3') 2270 '?/GLOBALROOT/Volume3' 2271 >>> san('\\\\.\\PhysicalDrive1\\root') 2272 'PhysicalDrive1/root' 2273 Retain any trailing slash. 2274 >>> san('abc/') 2275 'abc/' 2276 Raises a ValueError if the result is empty. 2277 >>> san('../..') 2278 Traceback (most recent call last): 2279 ... 2280 ValueError: Empty filename 2281 """ 2282 2283 def allowed(part): 2284 return part and part not in {'..', '.'} 2285 2286 # Remove the drive letter. 2287 # Don't use ntpath.splitdrive, because that also strips UNC paths 2288 bare = re.sub('^([A-Z]):', r'\1', name, flags=re.IGNORECASE) 2289 clean = bare.replace('\\', '/') 2290 parts = clean.split('/') 2291 joined = '/'.join(filter(allowed, parts)) 2292 if not joined: 2293 raise ValueError("Empty filename") 2294 return joined + '/' * name.endswith('/') 2295 2296 2297class CompleteDirs(SanitizedNames, ZipFile): 2298 """ 2299 A ZipFile subclass that ensures that implied directories 2300 are always included in the namelist. 2301 """ 2302 2303 @staticmethod 2304 def _implied_dirs(names): 2305 parents = itertools.chain.from_iterable(map(_parents, names)) 2306 as_dirs = (p + posixpath.sep for p in parents) 2307 return _dedupe(_difference(as_dirs, names)) 2308 2309 def namelist(self): 2310 names = super(CompleteDirs, self).namelist() 2311 return names + list(self._implied_dirs(names)) 2312 2313 def _name_set(self): 2314 return set(self.namelist()) 2315 2316 def resolve_dir(self, name): 2317 """ 2318 If the name represents a directory, return that name 2319 as a directory (with the trailing slash). 2320 """ 2321 names = self._name_set() 2322 dirname = name + '/' 2323 dir_match = name not in names and dirname in names 2324 return dirname if dir_match else name 2325 2326 def getinfo(self, name): 2327 """ 2328 Supplement getinfo for implied dirs. 2329 """ 2330 try: 2331 return super().getinfo(name) 2332 except KeyError: 2333 if not name.endswith('/') or name not in self._name_set(): 2334 raise 2335 return ZipInfo(filename=name) 2336 2337 @classmethod 2338 def make(cls, source): 2339 """ 2340 Given a source (filename or zipfile), return an 2341 appropriate CompleteDirs subclass. 2342 """ 2343 if isinstance(source, CompleteDirs): 2344 return source 2345 2346 if not isinstance(source, ZipFile): 2347 return cls(source) 2348 2349 # Only allow for FastLookup when supplied zipfile is read-only 2350 if 'r' not in source.mode: 2351 cls = CompleteDirs 2352 2353 source.__class__ = cls 2354 return source 2355 2356 2357class FastLookup(CompleteDirs): 2358 """ 2359 ZipFile subclass to ensure implicit 2360 dirs exist and are resolved rapidly. 2361 """ 2362 2363 def namelist(self): 2364 with contextlib.suppress(AttributeError): 2365 return self.__names 2366 self.__names = super(FastLookup, self).namelist() 2367 return self.__names 2368 2369 def _name_set(self): 2370 with contextlib.suppress(AttributeError): 2371 return self.__lookup 2372 self.__lookup = super(FastLookup, self)._name_set() 2373 return self.__lookup 2374 2375 2376def _extract_text_encoding(encoding=None, *args, **kwargs): 2377 # stacklevel=3 so that the caller of the caller see any warning. 2378 return io.text_encoding(encoding, 3), args, kwargs 2379 2380 2381class Path: 2382 """ 2383 A pathlib-compatible interface for zip files. 2384 2385 Consider a zip file with this structure:: 2386 2387 . 2388 ├── a.txt 2389 └── b 2390 ├── c.txt 2391 └── d 2392 └── e.txt 2393 2394 >>> data = io.BytesIO() 2395 >>> zf = ZipFile(data, 'w') 2396 >>> zf.writestr('a.txt', 'content of a') 2397 >>> zf.writestr('b/c.txt', 'content of c') 2398 >>> zf.writestr('b/d/e.txt', 'content of e') 2399 >>> zf.filename = 'mem/abcde.zip' 2400 2401 Path accepts the zipfile object itself or a filename 2402 2403 >>> root = Path(zf) 2404 2405 From there, several path operations are available. 2406 2407 Directory iteration (including the zip file itself): 2408 2409 >>> a, b = root.iterdir() 2410 >>> a 2411 Path('mem/abcde.zip', 'a.txt') 2412 >>> b 2413 Path('mem/abcde.zip', 'b/') 2414 2415 name property: 2416 2417 >>> b.name 2418 'b' 2419 2420 join with divide operator: 2421 2422 >>> c = b / 'c.txt' 2423 >>> c 2424 Path('mem/abcde.zip', 'b/c.txt') 2425 >>> c.name 2426 'c.txt' 2427 2428 Read text: 2429 2430 >>> c.read_text() 2431 'content of c' 2432 2433 existence: 2434 2435 >>> c.exists() 2436 True 2437 >>> (b / 'missing.txt').exists() 2438 False 2439 2440 Coercion to string: 2441 2442 >>> import os 2443 >>> str(c).replace(os.sep, posixpath.sep) 2444 'mem/abcde.zip/b/c.txt' 2445 2446 At the root, ``name``, ``filename``, and ``parent`` 2447 resolve to the zipfile. Note these attributes are not 2448 valid and will raise a ``ValueError`` if the zipfile 2449 has no filename. 2450 2451 >>> root.name 2452 'abcde.zip' 2453 >>> str(root.filename).replace(os.sep, posixpath.sep) 2454 'mem/abcde.zip' 2455 >>> str(root.parent) 2456 'mem' 2457 """ 2458 2459 __repr = "{self.__class__.__name__}({self.root.filename!r}, {self.at!r})" 2460 2461 def __init__(self, root, at=""): 2462 """ 2463 Construct a Path from a ZipFile or filename. 2464 2465 Note: When the source is an existing ZipFile object, 2466 its type (__class__) will be mutated to a 2467 specialized type. If the caller wishes to retain the 2468 original type, the caller should either create a 2469 separate ZipFile object or pass a filename. 2470 """ 2471 self.root = FastLookup.make(root) 2472 self.at = at 2473 2474 def open(self, mode='r', *args, pwd=None, **kwargs): 2475 """ 2476 Open this entry as text or binary following the semantics 2477 of ``pathlib.Path.open()`` by passing arguments through 2478 to io.TextIOWrapper(). 2479 """ 2480 if self.is_dir(): 2481 raise IsADirectoryError(self) 2482 zip_mode = mode[0] 2483 if not self.exists() and zip_mode == 'r': 2484 raise FileNotFoundError(self) 2485 stream = self.root.open(self.at, zip_mode, pwd=pwd) 2486 if 'b' in mode: 2487 if args or kwargs: 2488 raise ValueError("encoding args invalid for binary operation") 2489 return stream 2490 # Text mode: 2491 encoding, args, kwargs = _extract_text_encoding(*args, **kwargs) 2492 return io.TextIOWrapper(stream, encoding, *args, **kwargs) 2493 2494 @property 2495 def name(self): 2496 return pathlib.Path(self.at).name or self.filename.name 2497 2498 @property 2499 def suffix(self): 2500 return pathlib.Path(self.at).suffix or self.filename.suffix 2501 2502 @property 2503 def suffixes(self): 2504 return pathlib.Path(self.at).suffixes or self.filename.suffixes 2505 2506 @property 2507 def stem(self): 2508 return pathlib.Path(self.at).stem or self.filename.stem 2509 2510 @property 2511 def filename(self): 2512 return pathlib.Path(self.root.filename).joinpath(self.at) 2513 2514 def read_text(self, *args, **kwargs): 2515 encoding, args, kwargs = _extract_text_encoding(*args, **kwargs) 2516 with self.open('r', encoding, *args, **kwargs) as strm: 2517 return strm.read() 2518 2519 def read_bytes(self): 2520 with self.open('rb') as strm: 2521 return strm.read() 2522 2523 def _is_child(self, path): 2524 return posixpath.dirname(path.at.rstrip("/")) == self.at.rstrip("/") 2525 2526 def _next(self, at): 2527 return self.__class__(self.root, at) 2528 2529 def is_dir(self): 2530 return not self.at or self.at.endswith("/") 2531 2532 def is_file(self): 2533 return self.exists() and not self.is_dir() 2534 2535 def exists(self): 2536 return self.at in self.root._name_set() 2537 2538 def iterdir(self): 2539 if not self.is_dir(): 2540 raise ValueError("Can't listdir a file") 2541 subs = map(self._next, self.root.namelist()) 2542 return filter(self._is_child, subs) 2543 2544 def __str__(self): 2545 return posixpath.join(self.root.filename, self.at) 2546 2547 def __repr__(self): 2548 return self.__repr.format(self=self) 2549 2550 def joinpath(self, *other): 2551 next = posixpath.join(self.at, *other) 2552 return self._next(self.root.resolve_dir(next)) 2553 2554 __truediv__ = joinpath 2555 2556 @property 2557 def parent(self): 2558 if not self.at: 2559 return self.filename.parent 2560 parent_at = posixpath.dirname(self.at.rstrip('/')) 2561 if parent_at: 2562 parent_at += '/' 2563 return self._next(parent_at) 2564 2565 2566def main(args=None): 2567 import argparse 2568 2569 description = 'A simple command-line interface for zipfile module.' 2570 parser = argparse.ArgumentParser(description=description) 2571 group = parser.add_mutually_exclusive_group(required=True) 2572 group.add_argument('-l', '--list', metavar='<zipfile>', 2573 help='Show listing of a zipfile') 2574 group.add_argument('-e', '--extract', nargs=2, 2575 metavar=('<zipfile>', '<output_dir>'), 2576 help='Extract zipfile into target dir') 2577 group.add_argument('-c', '--create', nargs='+', 2578 metavar=('<name>', '<file>'), 2579 help='Create zipfile from sources') 2580 group.add_argument('-t', '--test', metavar='<zipfile>', 2581 help='Test if a zipfile is valid') 2582 parser.add_argument('--metadata-encoding', metavar='<encoding>', 2583 help='Specify encoding of member names for -l, -e and -t') 2584 args = parser.parse_args(args) 2585 2586 encoding = args.metadata_encoding 2587 2588 if args.test is not None: 2589 src = args.test 2590 with ZipFile(src, 'r', metadata_encoding=encoding) as zf: 2591 badfile = zf.testzip() 2592 if badfile: 2593 print("The following enclosed file is corrupted: {!r}".format(badfile)) 2594 print("Done testing") 2595 2596 elif args.list is not None: 2597 src = args.list 2598 with ZipFile(src, 'r', metadata_encoding=encoding) as zf: 2599 zf.printdir() 2600 2601 elif args.extract is not None: 2602 src, curdir = args.extract 2603 with ZipFile(src, 'r', metadata_encoding=encoding) as zf: 2604 zf.extractall(curdir) 2605 2606 elif args.create is not None: 2607 if encoding: 2608 print("Non-conforming encodings not supported with -c.", 2609 file=sys.stderr) 2610 sys.exit(1) 2611 2612 zip_name = args.create.pop(0) 2613 files = args.create 2614 2615 def addToZip(zf, path, zippath): 2616 if os.path.isfile(path): 2617 zf.write(path, zippath, ZIP_DEFLATED) 2618 elif os.path.isdir(path): 2619 if zippath: 2620 zf.write(path, zippath) 2621 for nm in sorted(os.listdir(path)): 2622 addToZip(zf, 2623 os.path.join(path, nm), os.path.join(zippath, nm)) 2624 # else: ignore 2625 2626 with ZipFile(zip_name, 'w') as zf: 2627 for path in files: 2628 zippath = os.path.basename(path) 2629 if not zippath: 2630 zippath = os.path.basename(os.path.dirname(path)) 2631 if zippath in ('', os.curdir, os.pardir): 2632 zippath = '' 2633 addToZip(zf, path, zippath) 2634 2635 2636if __name__ == "__main__": 2637 main() 2638