xref: /third_party/python/Lib/zipfile.py (revision 7db96d56)
1"""
2Read and write ZIP files.
3
4XXX references to utf-8 need further investigation.
5"""
6import binascii
7import importlib.util
8import io
9import itertools
10import os
11import posixpath
12import re
13import shutil
14import stat
15import struct
16import sys
17import threading
18import time
19import contextlib
20import pathlib
21
22try:
23    import zlib # We may need its compression method
24    crc32 = zlib.crc32
25except ImportError:
26    zlib = None
27    crc32 = binascii.crc32
28
29try:
30    import bz2 # We may need its compression method
31except ImportError:
32    bz2 = None
33
34try:
35    import lzma # We may need its compression method
36except ImportError:
37    lzma = None
38
39__all__ = ["BadZipFile", "BadZipfile", "error",
40           "ZIP_STORED", "ZIP_DEFLATED", "ZIP_BZIP2", "ZIP_LZMA",
41           "is_zipfile", "ZipInfo", "ZipFile", "PyZipFile", "LargeZipFile",
42           "Path"]
43
44class BadZipFile(Exception):
45    pass
46
47
48class LargeZipFile(Exception):
49    """
50    Raised when writing a zipfile, the zipfile requires ZIP64 extensions
51    and those extensions are disabled.
52    """
53
54error = BadZipfile = BadZipFile      # Pre-3.2 compatibility names
55
56
57ZIP64_LIMIT = (1 << 31) - 1
58ZIP_FILECOUNT_LIMIT = (1 << 16) - 1
59ZIP_MAX_COMMENT = (1 << 16) - 1
60
61# constants for Zip file compression methods
62ZIP_STORED = 0
63ZIP_DEFLATED = 8
64ZIP_BZIP2 = 12
65ZIP_LZMA = 14
66# Other ZIP compression methods not supported
67
68DEFAULT_VERSION = 20
69ZIP64_VERSION = 45
70BZIP2_VERSION = 46
71LZMA_VERSION = 63
72# we recognize (but not necessarily support) all features up to that version
73MAX_EXTRACT_VERSION = 63
74
75# Below are some formats and associated data for reading/writing headers using
76# the struct module.  The names and structures of headers/records are those used
77# in the PKWARE description of the ZIP file format:
78#     http://www.pkware.com/documents/casestudies/APPNOTE.TXT
79# (URL valid as of January 2008)
80
81# The "end of central directory" structure, magic number, size, and indices
82# (section V.I in the format document)
83structEndArchive = b"<4s4H2LH"
84stringEndArchive = b"PK\005\006"
85sizeEndCentDir = struct.calcsize(structEndArchive)
86
87_ECD_SIGNATURE = 0
88_ECD_DISK_NUMBER = 1
89_ECD_DISK_START = 2
90_ECD_ENTRIES_THIS_DISK = 3
91_ECD_ENTRIES_TOTAL = 4
92_ECD_SIZE = 5
93_ECD_OFFSET = 6
94_ECD_COMMENT_SIZE = 7
95# These last two indices are not part of the structure as defined in the
96# spec, but they are used internally by this module as a convenience
97_ECD_COMMENT = 8
98_ECD_LOCATION = 9
99
100# The "central directory" structure, magic number, size, and indices
101# of entries in the structure (section V.F in the format document)
102structCentralDir = "<4s4B4HL2L5H2L"
103stringCentralDir = b"PK\001\002"
104sizeCentralDir = struct.calcsize(structCentralDir)
105
106# indexes of entries in the central directory structure
107_CD_SIGNATURE = 0
108_CD_CREATE_VERSION = 1
109_CD_CREATE_SYSTEM = 2
110_CD_EXTRACT_VERSION = 3
111_CD_EXTRACT_SYSTEM = 4
112_CD_FLAG_BITS = 5
113_CD_COMPRESS_TYPE = 6
114_CD_TIME = 7
115_CD_DATE = 8
116_CD_CRC = 9
117_CD_COMPRESSED_SIZE = 10
118_CD_UNCOMPRESSED_SIZE = 11
119_CD_FILENAME_LENGTH = 12
120_CD_EXTRA_FIELD_LENGTH = 13
121_CD_COMMENT_LENGTH = 14
122_CD_DISK_NUMBER_START = 15
123_CD_INTERNAL_FILE_ATTRIBUTES = 16
124_CD_EXTERNAL_FILE_ATTRIBUTES = 17
125_CD_LOCAL_HEADER_OFFSET = 18
126
127# General purpose bit flags
128# Zip Appnote: 4.4.4 general purpose bit flag: (2 bytes)
129_MASK_ENCRYPTED = 1 << 0
130# Bits 1 and 2 have different meanings depending on the compression used.
131_MASK_COMPRESS_OPTION_1 = 1 << 1
132# _MASK_COMPRESS_OPTION_2 = 1 << 2
133# _MASK_USE_DATA_DESCRIPTOR: If set, crc-32, compressed size and uncompressed
134# size are zero in the local header and the real values are written in the data
135# descriptor immediately following the compressed data.
136_MASK_USE_DATA_DESCRIPTOR = 1 << 3
137# Bit 4: Reserved for use with compression method 8, for enhanced deflating.
138# _MASK_RESERVED_BIT_4 = 1 << 4
139_MASK_COMPRESSED_PATCH = 1 << 5
140_MASK_STRONG_ENCRYPTION = 1 << 6
141# _MASK_UNUSED_BIT_7 = 1 << 7
142# _MASK_UNUSED_BIT_8 = 1 << 8
143# _MASK_UNUSED_BIT_9 = 1 << 9
144# _MASK_UNUSED_BIT_10 = 1 << 10
145_MASK_UTF_FILENAME = 1 << 11
146# Bit 12: Reserved by PKWARE for enhanced compression.
147# _MASK_RESERVED_BIT_12 = 1 << 12
148# _MASK_ENCRYPTED_CENTRAL_DIR = 1 << 13
149# Bit 14, 15: Reserved by PKWARE
150# _MASK_RESERVED_BIT_14 = 1 << 14
151# _MASK_RESERVED_BIT_15 = 1 << 15
152
153# The "local file header" structure, magic number, size, and indices
154# (section V.A in the format document)
155structFileHeader = "<4s2B4HL2L2H"
156stringFileHeader = b"PK\003\004"
157sizeFileHeader = struct.calcsize(structFileHeader)
158
159_FH_SIGNATURE = 0
160_FH_EXTRACT_VERSION = 1
161_FH_EXTRACT_SYSTEM = 2
162_FH_GENERAL_PURPOSE_FLAG_BITS = 3
163_FH_COMPRESSION_METHOD = 4
164_FH_LAST_MOD_TIME = 5
165_FH_LAST_MOD_DATE = 6
166_FH_CRC = 7
167_FH_COMPRESSED_SIZE = 8
168_FH_UNCOMPRESSED_SIZE = 9
169_FH_FILENAME_LENGTH = 10
170_FH_EXTRA_FIELD_LENGTH = 11
171
172# The "Zip64 end of central directory locator" structure, magic number, and size
173structEndArchive64Locator = "<4sLQL"
174stringEndArchive64Locator = b"PK\x06\x07"
175sizeEndCentDir64Locator = struct.calcsize(structEndArchive64Locator)
176
177# The "Zip64 end of central directory" record, magic number, size, and indices
178# (section V.G in the format document)
179structEndArchive64 = "<4sQ2H2L4Q"
180stringEndArchive64 = b"PK\x06\x06"
181sizeEndCentDir64 = struct.calcsize(structEndArchive64)
182
183_CD64_SIGNATURE = 0
184_CD64_DIRECTORY_RECSIZE = 1
185_CD64_CREATE_VERSION = 2
186_CD64_EXTRACT_VERSION = 3
187_CD64_DISK_NUMBER = 4
188_CD64_DISK_NUMBER_START = 5
189_CD64_NUMBER_ENTRIES_THIS_DISK = 6
190_CD64_NUMBER_ENTRIES_TOTAL = 7
191_CD64_DIRECTORY_SIZE = 8
192_CD64_OFFSET_START_CENTDIR = 9
193
194_DD_SIGNATURE = 0x08074b50
195
196_EXTRA_FIELD_STRUCT = struct.Struct('<HH')
197
198def _strip_extra(extra, xids):
199    # Remove Extra Fields with specified IDs.
200    unpack = _EXTRA_FIELD_STRUCT.unpack
201    modified = False
202    buffer = []
203    start = i = 0
204    while i + 4 <= len(extra):
205        xid, xlen = unpack(extra[i : i + 4])
206        j = i + 4 + xlen
207        if xid in xids:
208            if i != start:
209                buffer.append(extra[start : i])
210            start = j
211            modified = True
212        i = j
213    if not modified:
214        return extra
215    if start != len(extra):
216        buffer.append(extra[start:])
217    return b''.join(buffer)
218
219def _check_zipfile(fp):
220    try:
221        if _EndRecData(fp):
222            return True         # file has correct magic number
223    except OSError:
224        pass
225    return False
226
227def is_zipfile(filename):
228    """Quickly see if a file is a ZIP file by checking the magic number.
229
230    The filename argument may be a file or file-like object too.
231    """
232    result = False
233    try:
234        if hasattr(filename, "read"):
235            result = _check_zipfile(fp=filename)
236        else:
237            with open(filename, "rb") as fp:
238                result = _check_zipfile(fp)
239    except OSError:
240        pass
241    return result
242
243def _EndRecData64(fpin, offset, endrec):
244    """
245    Read the ZIP64 end-of-archive records and use that to update endrec
246    """
247    try:
248        fpin.seek(offset - sizeEndCentDir64Locator, 2)
249    except OSError:
250        # If the seek fails, the file is not large enough to contain a ZIP64
251        # end-of-archive record, so just return the end record we were given.
252        return endrec
253
254    data = fpin.read(sizeEndCentDir64Locator)
255    if len(data) != sizeEndCentDir64Locator:
256        return endrec
257    sig, diskno, reloff, disks = struct.unpack(structEndArchive64Locator, data)
258    if sig != stringEndArchive64Locator:
259        return endrec
260
261    if diskno != 0 or disks > 1:
262        raise BadZipFile("zipfiles that span multiple disks are not supported")
263
264    # Assume no 'zip64 extensible data'
265    fpin.seek(offset - sizeEndCentDir64Locator - sizeEndCentDir64, 2)
266    data = fpin.read(sizeEndCentDir64)
267    if len(data) != sizeEndCentDir64:
268        return endrec
269    sig, sz, create_version, read_version, disk_num, disk_dir, \
270        dircount, dircount2, dirsize, diroffset = \
271        struct.unpack(structEndArchive64, data)
272    if sig != stringEndArchive64:
273        return endrec
274
275    # Update the original endrec using data from the ZIP64 record
276    endrec[_ECD_SIGNATURE] = sig
277    endrec[_ECD_DISK_NUMBER] = disk_num
278    endrec[_ECD_DISK_START] = disk_dir
279    endrec[_ECD_ENTRIES_THIS_DISK] = dircount
280    endrec[_ECD_ENTRIES_TOTAL] = dircount2
281    endrec[_ECD_SIZE] = dirsize
282    endrec[_ECD_OFFSET] = diroffset
283    return endrec
284
285
286def _EndRecData(fpin):
287    """Return data from the "End of Central Directory" record, or None.
288
289    The data is a list of the nine items in the ZIP "End of central dir"
290    record followed by a tenth item, the file seek offset of this record."""
291
292    # Determine file size
293    fpin.seek(0, 2)
294    filesize = fpin.tell()
295
296    # Check to see if this is ZIP file with no archive comment (the
297    # "end of central directory" structure should be the last item in the
298    # file if this is the case).
299    try:
300        fpin.seek(-sizeEndCentDir, 2)
301    except OSError:
302        return None
303    data = fpin.read()
304    if (len(data) == sizeEndCentDir and
305        data[0:4] == stringEndArchive and
306        data[-2:] == b"\000\000"):
307        # the signature is correct and there's no comment, unpack structure
308        endrec = struct.unpack(structEndArchive, data)
309        endrec=list(endrec)
310
311        # Append a blank comment and record start offset
312        endrec.append(b"")
313        endrec.append(filesize - sizeEndCentDir)
314
315        # Try to read the "Zip64 end of central directory" structure
316        return _EndRecData64(fpin, -sizeEndCentDir, endrec)
317
318    # Either this is not a ZIP file, or it is a ZIP file with an archive
319    # comment.  Search the end of the file for the "end of central directory"
320    # record signature. The comment is the last item in the ZIP file and may be
321    # up to 64K long.  It is assumed that the "end of central directory" magic
322    # number does not appear in the comment.
323    maxCommentStart = max(filesize - (1 << 16) - sizeEndCentDir, 0)
324    fpin.seek(maxCommentStart, 0)
325    data = fpin.read()
326    start = data.rfind(stringEndArchive)
327    if start >= 0:
328        # found the magic number; attempt to unpack and interpret
329        recData = data[start:start+sizeEndCentDir]
330        if len(recData) != sizeEndCentDir:
331            # Zip file is corrupted.
332            return None
333        endrec = list(struct.unpack(structEndArchive, recData))
334        commentSize = endrec[_ECD_COMMENT_SIZE] #as claimed by the zip file
335        comment = data[start+sizeEndCentDir:start+sizeEndCentDir+commentSize]
336        endrec.append(comment)
337        endrec.append(maxCommentStart + start)
338
339        # Try to read the "Zip64 end of central directory" structure
340        return _EndRecData64(fpin, maxCommentStart + start - filesize,
341                             endrec)
342
343    # Unable to find a valid end of central directory structure
344    return None
345
346
347class ZipInfo (object):
348    """Class with attributes describing each file in the ZIP archive."""
349
350    __slots__ = (
351        'orig_filename',
352        'filename',
353        'date_time',
354        'compress_type',
355        '_compresslevel',
356        'comment',
357        'extra',
358        'create_system',
359        'create_version',
360        'extract_version',
361        'reserved',
362        'flag_bits',
363        'volume',
364        'internal_attr',
365        'external_attr',
366        'header_offset',
367        'CRC',
368        'compress_size',
369        'file_size',
370        '_raw_time',
371        '_end_offset',
372    )
373
374    def __init__(self, filename="NoName", date_time=(1980,1,1,0,0,0)):
375        self.orig_filename = filename   # Original file name in archive
376
377        # Terminate the file name at the first null byte.  Null bytes in file
378        # names are used as tricks by viruses in archives.
379        null_byte = filename.find(chr(0))
380        if null_byte >= 0:
381            filename = filename[0:null_byte]
382        # This is used to ensure paths in generated ZIP files always use
383        # forward slashes as the directory separator, as required by the
384        # ZIP format specification.
385        if os.sep != "/" and os.sep in filename:
386            filename = filename.replace(os.sep, "/")
387
388        self.filename = filename        # Normalized file name
389        self.date_time = date_time      # year, month, day, hour, min, sec
390
391        if date_time[0] < 1980:
392            raise ValueError('ZIP does not support timestamps before 1980')
393
394        # Standard values:
395        self.compress_type = ZIP_STORED # Type of compression for the file
396        self._compresslevel = None      # Level for the compressor
397        self.comment = b""              # Comment for each file
398        self.extra = b""                # ZIP extra data
399        if sys.platform == 'win32':
400            self.create_system = 0          # System which created ZIP archive
401        else:
402            # Assume everything else is unix-y
403            self.create_system = 3          # System which created ZIP archive
404        self.create_version = DEFAULT_VERSION  # Version which created ZIP archive
405        self.extract_version = DEFAULT_VERSION # Version needed to extract archive
406        self.reserved = 0               # Must be zero
407        self.flag_bits = 0              # ZIP flag bits
408        self.volume = 0                 # Volume number of file header
409        self.internal_attr = 0          # Internal attributes
410        self.external_attr = 0          # External file attributes
411        self.compress_size = 0          # Size of the compressed file
412        self.file_size = 0              # Size of the uncompressed file
413        self._end_offset = None         # Start of the next local header or central directory
414        # Other attributes are set by class ZipFile:
415        # header_offset         Byte offset to the file header
416        # CRC                   CRC-32 of the uncompressed file
417
418    def __repr__(self):
419        result = ['<%s filename=%r' % (self.__class__.__name__, self.filename)]
420        if self.compress_type != ZIP_STORED:
421            result.append(' compress_type=%s' %
422                          compressor_names.get(self.compress_type,
423                                               self.compress_type))
424        hi = self.external_attr >> 16
425        lo = self.external_attr & 0xFFFF
426        if hi:
427            result.append(' filemode=%r' % stat.filemode(hi))
428        if lo:
429            result.append(' external_attr=%#x' % lo)
430        isdir = self.is_dir()
431        if not isdir or self.file_size:
432            result.append(' file_size=%r' % self.file_size)
433        if ((not isdir or self.compress_size) and
434            (self.compress_type != ZIP_STORED or
435             self.file_size != self.compress_size)):
436            result.append(' compress_size=%r' % self.compress_size)
437        result.append('>')
438        return ''.join(result)
439
440    def FileHeader(self, zip64=None):
441        """Return the per-file header as a bytes object.
442
443        When the optional zip64 arg is None rather than a bool, we will
444        decide based upon the file_size and compress_size, if known,
445        False otherwise.
446        """
447        dt = self.date_time
448        dosdate = (dt[0] - 1980) << 9 | dt[1] << 5 | dt[2]
449        dostime = dt[3] << 11 | dt[4] << 5 | (dt[5] // 2)
450        if self.flag_bits & _MASK_USE_DATA_DESCRIPTOR:
451            # Set these to zero because we write them after the file data
452            CRC = compress_size = file_size = 0
453        else:
454            CRC = self.CRC
455            compress_size = self.compress_size
456            file_size = self.file_size
457
458        extra = self.extra
459
460        min_version = 0
461        if zip64 is None:
462            # We always explicitly pass zip64 within this module.... This
463            # remains for anyone using ZipInfo.FileHeader as a public API.
464            zip64 = file_size > ZIP64_LIMIT or compress_size > ZIP64_LIMIT
465        if zip64:
466            fmt = '<HHQQ'
467            extra = extra + struct.pack(fmt,
468                                        1, struct.calcsize(fmt)-4, file_size, compress_size)
469            file_size = 0xffffffff
470            compress_size = 0xffffffff
471            min_version = ZIP64_VERSION
472
473        if self.compress_type == ZIP_BZIP2:
474            min_version = max(BZIP2_VERSION, min_version)
475        elif self.compress_type == ZIP_LZMA:
476            min_version = max(LZMA_VERSION, min_version)
477
478        self.extract_version = max(min_version, self.extract_version)
479        self.create_version = max(min_version, self.create_version)
480        filename, flag_bits = self._encodeFilenameFlags()
481        header = struct.pack(structFileHeader, stringFileHeader,
482                             self.extract_version, self.reserved, flag_bits,
483                             self.compress_type, dostime, dosdate, CRC,
484                             compress_size, file_size,
485                             len(filename), len(extra))
486        return header + filename + extra
487
488    def _encodeFilenameFlags(self):
489        try:
490            return self.filename.encode('ascii'), self.flag_bits
491        except UnicodeEncodeError:
492            return self.filename.encode('utf-8'), self.flag_bits | _MASK_UTF_FILENAME
493
494    def _decodeExtra(self):
495        # Try to decode the extra field.
496        extra = self.extra
497        unpack = struct.unpack
498        while len(extra) >= 4:
499            tp, ln = unpack('<HH', extra[:4])
500            if ln+4 > len(extra):
501                raise BadZipFile("Corrupt extra field %04x (size=%d)" % (tp, ln))
502            if tp == 0x0001:
503                data = extra[4:ln+4]
504                # ZIP64 extension (large files and/or large archives)
505                try:
506                    if self.file_size in (0xFFFF_FFFF_FFFF_FFFF, 0xFFFF_FFFF):
507                        field = "File size"
508                        self.file_size, = unpack('<Q', data[:8])
509                        data = data[8:]
510                    if self.compress_size == 0xFFFF_FFFF:
511                        field = "Compress size"
512                        self.compress_size, = unpack('<Q', data[:8])
513                        data = data[8:]
514                    if self.header_offset == 0xFFFF_FFFF:
515                        field = "Header offset"
516                        self.header_offset, = unpack('<Q', data[:8])
517                except struct.error:
518                    raise BadZipFile(f"Corrupt zip64 extra field. "
519                                     f"{field} not found.") from None
520
521            extra = extra[ln+4:]
522
523    @classmethod
524    def from_file(cls, filename, arcname=None, *, strict_timestamps=True):
525        """Construct an appropriate ZipInfo for a file on the filesystem.
526
527        filename should be the path to a file or directory on the filesystem.
528
529        arcname is the name which it will have within the archive (by default,
530        this will be the same as filename, but without a drive letter and with
531        leading path separators removed).
532        """
533        if isinstance(filename, os.PathLike):
534            filename = os.fspath(filename)
535        st = os.stat(filename)
536        isdir = stat.S_ISDIR(st.st_mode)
537        mtime = time.localtime(st.st_mtime)
538        date_time = mtime[0:6]
539        if not strict_timestamps and date_time[0] < 1980:
540            date_time = (1980, 1, 1, 0, 0, 0)
541        elif not strict_timestamps and date_time[0] > 2107:
542            date_time = (2107, 12, 31, 23, 59, 59)
543        # Create ZipInfo instance to store file information
544        if arcname is None:
545            arcname = filename
546        arcname = os.path.normpath(os.path.splitdrive(arcname)[1])
547        while arcname[0] in (os.sep, os.altsep):
548            arcname = arcname[1:]
549        if isdir:
550            arcname += '/'
551        zinfo = cls(arcname, date_time)
552        zinfo.external_attr = (st.st_mode & 0xFFFF) << 16  # Unix attributes
553        if isdir:
554            zinfo.file_size = 0
555            zinfo.external_attr |= 0x10  # MS-DOS directory flag
556        else:
557            zinfo.file_size = st.st_size
558
559        return zinfo
560
561    def is_dir(self):
562        """Return True if this archive member is a directory."""
563        return self.filename[-1] == '/'
564
565
566# ZIP encryption uses the CRC32 one-byte primitive for scrambling some
567# internal keys. We noticed that a direct implementation is faster than
568# relying on binascii.crc32().
569
570_crctable = None
571def _gen_crc(crc):
572    for j in range(8):
573        if crc & 1:
574            crc = (crc >> 1) ^ 0xEDB88320
575        else:
576            crc >>= 1
577    return crc
578
579# ZIP supports a password-based form of encryption. Even though known
580# plaintext attacks have been found against it, it is still useful
581# to be able to get data out of such a file.
582#
583# Usage:
584#     zd = _ZipDecrypter(mypwd)
585#     plain_bytes = zd(cypher_bytes)
586
587def _ZipDecrypter(pwd):
588    key0 = 305419896
589    key1 = 591751049
590    key2 = 878082192
591
592    global _crctable
593    if _crctable is None:
594        _crctable = list(map(_gen_crc, range(256)))
595    crctable = _crctable
596
597    def crc32(ch, crc):
598        """Compute the CRC32 primitive on one byte."""
599        return (crc >> 8) ^ crctable[(crc ^ ch) & 0xFF]
600
601    def update_keys(c):
602        nonlocal key0, key1, key2
603        key0 = crc32(c, key0)
604        key1 = (key1 + (key0 & 0xFF)) & 0xFFFFFFFF
605        key1 = (key1 * 134775813 + 1) & 0xFFFFFFFF
606        key2 = crc32(key1 >> 24, key2)
607
608    for p in pwd:
609        update_keys(p)
610
611    def decrypter(data):
612        """Decrypt a bytes object."""
613        result = bytearray()
614        append = result.append
615        for c in data:
616            k = key2 | 2
617            c ^= ((k * (k^1)) >> 8) & 0xFF
618            update_keys(c)
619            append(c)
620        return bytes(result)
621
622    return decrypter
623
624
625class LZMACompressor:
626
627    def __init__(self):
628        self._comp = None
629
630    def _init(self):
631        props = lzma._encode_filter_properties({'id': lzma.FILTER_LZMA1})
632        self._comp = lzma.LZMACompressor(lzma.FORMAT_RAW, filters=[
633            lzma._decode_filter_properties(lzma.FILTER_LZMA1, props)
634        ])
635        return struct.pack('<BBH', 9, 4, len(props)) + props
636
637    def compress(self, data):
638        if self._comp is None:
639            return self._init() + self._comp.compress(data)
640        return self._comp.compress(data)
641
642    def flush(self):
643        if self._comp is None:
644            return self._init() + self._comp.flush()
645        return self._comp.flush()
646
647
648class LZMADecompressor:
649
650    def __init__(self):
651        self._decomp = None
652        self._unconsumed = b''
653        self.eof = False
654
655    def decompress(self, data):
656        if self._decomp is None:
657            self._unconsumed += data
658            if len(self._unconsumed) <= 4:
659                return b''
660            psize, = struct.unpack('<H', self._unconsumed[2:4])
661            if len(self._unconsumed) <= 4 + psize:
662                return b''
663
664            self._decomp = lzma.LZMADecompressor(lzma.FORMAT_RAW, filters=[
665                lzma._decode_filter_properties(lzma.FILTER_LZMA1,
666                                               self._unconsumed[4:4 + psize])
667            ])
668            data = self._unconsumed[4 + psize:]
669            del self._unconsumed
670
671        result = self._decomp.decompress(data)
672        self.eof = self._decomp.eof
673        return result
674
675
676compressor_names = {
677    0: 'store',
678    1: 'shrink',
679    2: 'reduce',
680    3: 'reduce',
681    4: 'reduce',
682    5: 'reduce',
683    6: 'implode',
684    7: 'tokenize',
685    8: 'deflate',
686    9: 'deflate64',
687    10: 'implode',
688    12: 'bzip2',
689    14: 'lzma',
690    18: 'terse',
691    19: 'lz77',
692    97: 'wavpack',
693    98: 'ppmd',
694}
695
696def _check_compression(compression):
697    if compression == ZIP_STORED:
698        pass
699    elif compression == ZIP_DEFLATED:
700        if not zlib:
701            raise RuntimeError(
702                "Compression requires the (missing) zlib module")
703    elif compression == ZIP_BZIP2:
704        if not bz2:
705            raise RuntimeError(
706                "Compression requires the (missing) bz2 module")
707    elif compression == ZIP_LZMA:
708        if not lzma:
709            raise RuntimeError(
710                "Compression requires the (missing) lzma module")
711    else:
712        raise NotImplementedError("That compression method is not supported")
713
714
715def _get_compressor(compress_type, compresslevel=None):
716    if compress_type == ZIP_DEFLATED:
717        if compresslevel is not None:
718            return zlib.compressobj(compresslevel, zlib.DEFLATED, -15)
719        return zlib.compressobj(zlib.Z_DEFAULT_COMPRESSION, zlib.DEFLATED, -15)
720    elif compress_type == ZIP_BZIP2:
721        if compresslevel is not None:
722            return bz2.BZ2Compressor(compresslevel)
723        return bz2.BZ2Compressor()
724    # compresslevel is ignored for ZIP_LZMA
725    elif compress_type == ZIP_LZMA:
726        return LZMACompressor()
727    else:
728        return None
729
730
731def _get_decompressor(compress_type):
732    _check_compression(compress_type)
733    if compress_type == ZIP_STORED:
734        return None
735    elif compress_type == ZIP_DEFLATED:
736        return zlib.decompressobj(-15)
737    elif compress_type == ZIP_BZIP2:
738        return bz2.BZ2Decompressor()
739    elif compress_type == ZIP_LZMA:
740        return LZMADecompressor()
741    else:
742        descr = compressor_names.get(compress_type)
743        if descr:
744            raise NotImplementedError("compression type %d (%s)" % (compress_type, descr))
745        else:
746            raise NotImplementedError("compression type %d" % (compress_type,))
747
748
749class _SharedFile:
750    def __init__(self, file, pos, close, lock, writing):
751        self._file = file
752        self._pos = pos
753        self._close = close
754        self._lock = lock
755        self._writing = writing
756        self.seekable = file.seekable
757
758    def tell(self):
759        return self._pos
760
761    def seek(self, offset, whence=0):
762        with self._lock:
763            if self._writing():
764                raise ValueError("Can't reposition in the ZIP file while "
765                        "there is an open writing handle on it. "
766                        "Close the writing handle before trying to read.")
767            self._file.seek(offset, whence)
768            self._pos = self._file.tell()
769            return self._pos
770
771    def read(self, n=-1):
772        with self._lock:
773            if self._writing():
774                raise ValueError("Can't read from the ZIP file while there "
775                        "is an open writing handle on it. "
776                        "Close the writing handle before trying to read.")
777            self._file.seek(self._pos)
778            data = self._file.read(n)
779            self._pos = self._file.tell()
780            return data
781
782    def close(self):
783        if self._file is not None:
784            fileobj = self._file
785            self._file = None
786            self._close(fileobj)
787
788# Provide the tell method for unseekable stream
789class _Tellable:
790    def __init__(self, fp):
791        self.fp = fp
792        self.offset = 0
793
794    def write(self, data):
795        n = self.fp.write(data)
796        self.offset += n
797        return n
798
799    def tell(self):
800        return self.offset
801
802    def flush(self):
803        self.fp.flush()
804
805    def close(self):
806        self.fp.close()
807
808
809class ZipExtFile(io.BufferedIOBase):
810    """File-like object for reading an archive member.
811       Is returned by ZipFile.open().
812    """
813
814    # Max size supported by decompressor.
815    MAX_N = 1 << 31 - 1
816
817    # Read from compressed files in 4k blocks.
818    MIN_READ_SIZE = 4096
819
820    # Chunk size to read during seek
821    MAX_SEEK_READ = 1 << 24
822
823    def __init__(self, fileobj, mode, zipinfo, pwd=None,
824                 close_fileobj=False):
825        self._fileobj = fileobj
826        self._pwd = pwd
827        self._close_fileobj = close_fileobj
828
829        self._compress_type = zipinfo.compress_type
830        self._compress_left = zipinfo.compress_size
831        self._left = zipinfo.file_size
832
833        self._decompressor = _get_decompressor(self._compress_type)
834
835        self._eof = False
836        self._readbuffer = b''
837        self._offset = 0
838
839        self.newlines = None
840
841        self.mode = mode
842        self.name = zipinfo.filename
843
844        if hasattr(zipinfo, 'CRC'):
845            self._expected_crc = zipinfo.CRC
846            self._running_crc = crc32(b'')
847        else:
848            self._expected_crc = None
849
850        self._seekable = False
851        try:
852            if fileobj.seekable():
853                self._orig_compress_start = fileobj.tell()
854                self._orig_compress_size = zipinfo.compress_size
855                self._orig_file_size = zipinfo.file_size
856                self._orig_start_crc = self._running_crc
857                self._seekable = True
858        except AttributeError:
859            pass
860
861        self._decrypter = None
862        if pwd:
863            if zipinfo.flag_bits & _MASK_USE_DATA_DESCRIPTOR:
864                # compare against the file type from extended local headers
865                check_byte = (zipinfo._raw_time >> 8) & 0xff
866            else:
867                # compare against the CRC otherwise
868                check_byte = (zipinfo.CRC >> 24) & 0xff
869            h = self._init_decrypter()
870            if h != check_byte:
871                raise RuntimeError("Bad password for file %r" % zipinfo.orig_filename)
872
873
874    def _init_decrypter(self):
875        self._decrypter = _ZipDecrypter(self._pwd)
876        # The first 12 bytes in the cypher stream is an encryption header
877        #  used to strengthen the algorithm. The first 11 bytes are
878        #  completely random, while the 12th contains the MSB of the CRC,
879        #  or the MSB of the file time depending on the header type
880        #  and is used to check the correctness of the password.
881        header = self._fileobj.read(12)
882        self._compress_left -= 12
883        return self._decrypter(header)[11]
884
885    def __repr__(self):
886        result = ['<%s.%s' % (self.__class__.__module__,
887                              self.__class__.__qualname__)]
888        if not self.closed:
889            result.append(' name=%r mode=%r' % (self.name, self.mode))
890            if self._compress_type != ZIP_STORED:
891                result.append(' compress_type=%s' %
892                              compressor_names.get(self._compress_type,
893                                                   self._compress_type))
894        else:
895            result.append(' [closed]')
896        result.append('>')
897        return ''.join(result)
898
899    def readline(self, limit=-1):
900        """Read and return a line from the stream.
901
902        If limit is specified, at most limit bytes will be read.
903        """
904
905        if limit < 0:
906            # Shortcut common case - newline found in buffer.
907            i = self._readbuffer.find(b'\n', self._offset) + 1
908            if i > 0:
909                line = self._readbuffer[self._offset: i]
910                self._offset = i
911                return line
912
913        return io.BufferedIOBase.readline(self, limit)
914
915    def peek(self, n=1):
916        """Returns buffered bytes without advancing the position."""
917        if n > len(self._readbuffer) - self._offset:
918            chunk = self.read(n)
919            if len(chunk) > self._offset:
920                self._readbuffer = chunk + self._readbuffer[self._offset:]
921                self._offset = 0
922            else:
923                self._offset -= len(chunk)
924
925        # Return up to 512 bytes to reduce allocation overhead for tight loops.
926        return self._readbuffer[self._offset: self._offset + 512]
927
928    def readable(self):
929        if self.closed:
930            raise ValueError("I/O operation on closed file.")
931        return True
932
933    def read(self, n=-1):
934        """Read and return up to n bytes.
935        If the argument is omitted, None, or negative, data is read and returned until EOF is reached.
936        """
937        if self.closed:
938            raise ValueError("read from closed file.")
939        if n is None or n < 0:
940            buf = self._readbuffer[self._offset:]
941            self._readbuffer = b''
942            self._offset = 0
943            while not self._eof:
944                buf += self._read1(self.MAX_N)
945            return buf
946
947        end = n + self._offset
948        if end < len(self._readbuffer):
949            buf = self._readbuffer[self._offset:end]
950            self._offset = end
951            return buf
952
953        n = end - len(self._readbuffer)
954        buf = self._readbuffer[self._offset:]
955        self._readbuffer = b''
956        self._offset = 0
957        while n > 0 and not self._eof:
958            data = self._read1(n)
959            if n < len(data):
960                self._readbuffer = data
961                self._offset = n
962                buf += data[:n]
963                break
964            buf += data
965            n -= len(data)
966        return buf
967
968    def _update_crc(self, newdata):
969        # Update the CRC using the given data.
970        if self._expected_crc is None:
971            # No need to compute the CRC if we don't have a reference value
972            return
973        self._running_crc = crc32(newdata, self._running_crc)
974        # Check the CRC if we're at the end of the file
975        if self._eof and self._running_crc != self._expected_crc:
976            raise BadZipFile("Bad CRC-32 for file %r" % self.name)
977
978    def read1(self, n):
979        """Read up to n bytes with at most one read() system call."""
980
981        if n is None or n < 0:
982            buf = self._readbuffer[self._offset:]
983            self._readbuffer = b''
984            self._offset = 0
985            while not self._eof:
986                data = self._read1(self.MAX_N)
987                if data:
988                    buf += data
989                    break
990            return buf
991
992        end = n + self._offset
993        if end < len(self._readbuffer):
994            buf = self._readbuffer[self._offset:end]
995            self._offset = end
996            return buf
997
998        n = end - len(self._readbuffer)
999        buf = self._readbuffer[self._offset:]
1000        self._readbuffer = b''
1001        self._offset = 0
1002        if n > 0:
1003            while not self._eof:
1004                data = self._read1(n)
1005                if n < len(data):
1006                    self._readbuffer = data
1007                    self._offset = n
1008                    buf += data[:n]
1009                    break
1010                if data:
1011                    buf += data
1012                    break
1013        return buf
1014
1015    def _read1(self, n):
1016        # Read up to n compressed bytes with at most one read() system call,
1017        # decrypt and decompress them.
1018        if self._eof or n <= 0:
1019            return b''
1020
1021        # Read from file.
1022        if self._compress_type == ZIP_DEFLATED:
1023            ## Handle unconsumed data.
1024            data = self._decompressor.unconsumed_tail
1025            if n > len(data):
1026                data += self._read2(n - len(data))
1027        else:
1028            data = self._read2(n)
1029
1030        if self._compress_type == ZIP_STORED:
1031            self._eof = self._compress_left <= 0
1032        elif self._compress_type == ZIP_DEFLATED:
1033            n = max(n, self.MIN_READ_SIZE)
1034            data = self._decompressor.decompress(data, n)
1035            self._eof = (self._decompressor.eof or
1036                         self._compress_left <= 0 and
1037                         not self._decompressor.unconsumed_tail)
1038            if self._eof:
1039                data += self._decompressor.flush()
1040        else:
1041            data = self._decompressor.decompress(data)
1042            self._eof = self._decompressor.eof or self._compress_left <= 0
1043
1044        data = data[:self._left]
1045        self._left -= len(data)
1046        if self._left <= 0:
1047            self._eof = True
1048        self._update_crc(data)
1049        return data
1050
1051    def _read2(self, n):
1052        if self._compress_left <= 0:
1053            return b''
1054
1055        n = max(n, self.MIN_READ_SIZE)
1056        n = min(n, self._compress_left)
1057
1058        data = self._fileobj.read(n)
1059        self._compress_left -= len(data)
1060        if not data:
1061            raise EOFError
1062
1063        if self._decrypter is not None:
1064            data = self._decrypter(data)
1065        return data
1066
1067    def close(self):
1068        try:
1069            if self._close_fileobj:
1070                self._fileobj.close()
1071        finally:
1072            super().close()
1073
1074    def seekable(self):
1075        if self.closed:
1076            raise ValueError("I/O operation on closed file.")
1077        return self._seekable
1078
1079    def seek(self, offset, whence=0):
1080        if self.closed:
1081            raise ValueError("seek on closed file.")
1082        if not self._seekable:
1083            raise io.UnsupportedOperation("underlying stream is not seekable")
1084        curr_pos = self.tell()
1085        if whence == 0: # Seek from start of file
1086            new_pos = offset
1087        elif whence == 1: # Seek from current position
1088            new_pos = curr_pos + offset
1089        elif whence == 2: # Seek from EOF
1090            new_pos = self._orig_file_size + offset
1091        else:
1092            raise ValueError("whence must be os.SEEK_SET (0), "
1093                             "os.SEEK_CUR (1), or os.SEEK_END (2)")
1094
1095        if new_pos > self._orig_file_size:
1096            new_pos = self._orig_file_size
1097
1098        if new_pos < 0:
1099            new_pos = 0
1100
1101        read_offset = new_pos - curr_pos
1102        buff_offset = read_offset + self._offset
1103
1104        if buff_offset >= 0 and buff_offset < len(self._readbuffer):
1105            # Just move the _offset index if the new position is in the _readbuffer
1106            self._offset = buff_offset
1107            read_offset = 0
1108        elif read_offset < 0:
1109            # Position is before the current position. Reset the ZipExtFile
1110            self._fileobj.seek(self._orig_compress_start)
1111            self._running_crc = self._orig_start_crc
1112            self._compress_left = self._orig_compress_size
1113            self._left = self._orig_file_size
1114            self._readbuffer = b''
1115            self._offset = 0
1116            self._decompressor = _get_decompressor(self._compress_type)
1117            self._eof = False
1118            read_offset = new_pos
1119            if self._decrypter is not None:
1120                self._init_decrypter()
1121
1122        while read_offset > 0:
1123            read_len = min(self.MAX_SEEK_READ, read_offset)
1124            self.read(read_len)
1125            read_offset -= read_len
1126
1127        return self.tell()
1128
1129    def tell(self):
1130        if self.closed:
1131            raise ValueError("tell on closed file.")
1132        if not self._seekable:
1133            raise io.UnsupportedOperation("underlying stream is not seekable")
1134        filepos = self._orig_file_size - self._left - len(self._readbuffer) + self._offset
1135        return filepos
1136
1137
1138class _ZipWriteFile(io.BufferedIOBase):
1139    def __init__(self, zf, zinfo, zip64):
1140        self._zinfo = zinfo
1141        self._zip64 = zip64
1142        self._zipfile = zf
1143        self._compressor = _get_compressor(zinfo.compress_type,
1144                                           zinfo._compresslevel)
1145        self._file_size = 0
1146        self._compress_size = 0
1147        self._crc = 0
1148
1149    @property
1150    def _fileobj(self):
1151        return self._zipfile.fp
1152
1153    def writable(self):
1154        return True
1155
1156    def write(self, data):
1157        if self.closed:
1158            raise ValueError('I/O operation on closed file.')
1159
1160        # Accept any data that supports the buffer protocol
1161        if isinstance(data, (bytes, bytearray)):
1162            nbytes = len(data)
1163        else:
1164            data = memoryview(data)
1165            nbytes = data.nbytes
1166        self._file_size += nbytes
1167
1168        self._crc = crc32(data, self._crc)
1169        if self._compressor:
1170            data = self._compressor.compress(data)
1171            self._compress_size += len(data)
1172        self._fileobj.write(data)
1173        return nbytes
1174
1175    def close(self):
1176        if self.closed:
1177            return
1178        try:
1179            super().close()
1180            # Flush any data from the compressor, and update header info
1181            if self._compressor:
1182                buf = self._compressor.flush()
1183                self._compress_size += len(buf)
1184                self._fileobj.write(buf)
1185                self._zinfo.compress_size = self._compress_size
1186            else:
1187                self._zinfo.compress_size = self._file_size
1188            self._zinfo.CRC = self._crc
1189            self._zinfo.file_size = self._file_size
1190
1191            if not self._zip64:
1192                if self._file_size > ZIP64_LIMIT:
1193                    raise RuntimeError("File size too large, try using force_zip64")
1194                if self._compress_size > ZIP64_LIMIT:
1195                    raise RuntimeError("Compressed size too large, try using force_zip64")
1196
1197            # Write updated header info
1198            if self._zinfo.flag_bits & _MASK_USE_DATA_DESCRIPTOR:
1199                # Write CRC and file sizes after the file data
1200                fmt = '<LLQQ' if self._zip64 else '<LLLL'
1201                self._fileobj.write(struct.pack(fmt, _DD_SIGNATURE, self._zinfo.CRC,
1202                    self._zinfo.compress_size, self._zinfo.file_size))
1203                self._zipfile.start_dir = self._fileobj.tell()
1204            else:
1205                # Seek backwards and write file header (which will now include
1206                # correct CRC and file sizes)
1207
1208                # Preserve current position in file
1209                self._zipfile.start_dir = self._fileobj.tell()
1210                self._fileobj.seek(self._zinfo.header_offset)
1211                self._fileobj.write(self._zinfo.FileHeader(self._zip64))
1212                self._fileobj.seek(self._zipfile.start_dir)
1213
1214            # Successfully written: Add file to our caches
1215            self._zipfile.filelist.append(self._zinfo)
1216            self._zipfile.NameToInfo[self._zinfo.filename] = self._zinfo
1217        finally:
1218            self._zipfile._writing = False
1219
1220
1221
1222class ZipFile:
1223    """ Class with methods to open, read, write, close, list zip files.
1224
1225    z = ZipFile(file, mode="r", compression=ZIP_STORED, allowZip64=True,
1226                compresslevel=None)
1227
1228    file: Either the path to the file, or a file-like object.
1229          If it is a path, the file will be opened and closed by ZipFile.
1230    mode: The mode can be either read 'r', write 'w', exclusive create 'x',
1231          or append 'a'.
1232    compression: ZIP_STORED (no compression), ZIP_DEFLATED (requires zlib),
1233                 ZIP_BZIP2 (requires bz2) or ZIP_LZMA (requires lzma).
1234    allowZip64: if True ZipFile will create files with ZIP64 extensions when
1235                needed, otherwise it will raise an exception when this would
1236                be necessary.
1237    compresslevel: None (default for the given compression type) or an integer
1238                   specifying the level to pass to the compressor.
1239                   When using ZIP_STORED or ZIP_LZMA this keyword has no effect.
1240                   When using ZIP_DEFLATED integers 0 through 9 are accepted.
1241                   When using ZIP_BZIP2 integers 1 through 9 are accepted.
1242
1243    """
1244
1245    fp = None                   # Set here since __del__ checks it
1246    _windows_illegal_name_trans_table = None
1247
1248    def __init__(self, file, mode="r", compression=ZIP_STORED, allowZip64=True,
1249                 compresslevel=None, *, strict_timestamps=True, metadata_encoding=None):
1250        """Open the ZIP file with mode read 'r', write 'w', exclusive create 'x',
1251        or append 'a'."""
1252        if mode not in ('r', 'w', 'x', 'a'):
1253            raise ValueError("ZipFile requires mode 'r', 'w', 'x', or 'a'")
1254
1255        _check_compression(compression)
1256
1257        self._allowZip64 = allowZip64
1258        self._didModify = False
1259        self.debug = 0  # Level of printing: 0 through 3
1260        self.NameToInfo = {}    # Find file info given name
1261        self.filelist = []      # List of ZipInfo instances for archive
1262        self.compression = compression  # Method of compression
1263        self.compresslevel = compresslevel
1264        self.mode = mode
1265        self.pwd = None
1266        self._comment = b''
1267        self._strict_timestamps = strict_timestamps
1268        self.metadata_encoding = metadata_encoding
1269
1270        # Check that we don't try to write with nonconforming codecs
1271        if self.metadata_encoding and mode != 'r':
1272            raise ValueError(
1273                "metadata_encoding is only supported for reading files")
1274
1275        # Check if we were passed a file-like object
1276        if isinstance(file, os.PathLike):
1277            file = os.fspath(file)
1278        if isinstance(file, str):
1279            # No, it's a filename
1280            self._filePassed = 0
1281            self.filename = file
1282            modeDict = {'r' : 'rb', 'w': 'w+b', 'x': 'x+b', 'a' : 'r+b',
1283                        'r+b': 'w+b', 'w+b': 'wb', 'x+b': 'xb'}
1284            filemode = modeDict[mode]
1285            while True:
1286                try:
1287                    self.fp = io.open(file, filemode)
1288                except OSError:
1289                    if filemode in modeDict:
1290                        filemode = modeDict[filemode]
1291                        continue
1292                    raise
1293                break
1294        else:
1295            self._filePassed = 1
1296            self.fp = file
1297            self.filename = getattr(file, 'name', None)
1298        self._fileRefCnt = 1
1299        self._lock = threading.RLock()
1300        self._seekable = True
1301        self._writing = False
1302
1303        try:
1304            if mode == 'r':
1305                self._RealGetContents()
1306            elif mode in ('w', 'x'):
1307                # set the modified flag so central directory gets written
1308                # even if no files are added to the archive
1309                self._didModify = True
1310                try:
1311                    self.start_dir = self.fp.tell()
1312                except (AttributeError, OSError):
1313                    self.fp = _Tellable(self.fp)
1314                    self.start_dir = 0
1315                    self._seekable = False
1316                else:
1317                    # Some file-like objects can provide tell() but not seek()
1318                    try:
1319                        self.fp.seek(self.start_dir)
1320                    except (AttributeError, OSError):
1321                        self._seekable = False
1322            elif mode == 'a':
1323                try:
1324                    # See if file is a zip file
1325                    self._RealGetContents()
1326                    # seek to start of directory and overwrite
1327                    self.fp.seek(self.start_dir)
1328                except BadZipFile:
1329                    # file is not a zip file, just append
1330                    self.fp.seek(0, 2)
1331
1332                    # set the modified flag so central directory gets written
1333                    # even if no files are added to the archive
1334                    self._didModify = True
1335                    self.start_dir = self.fp.tell()
1336            else:
1337                raise ValueError("Mode must be 'r', 'w', 'x', or 'a'")
1338        except:
1339            fp = self.fp
1340            self.fp = None
1341            self._fpclose(fp)
1342            raise
1343
1344    def __enter__(self):
1345        return self
1346
1347    def __exit__(self, type, value, traceback):
1348        self.close()
1349
1350    def __repr__(self):
1351        result = ['<%s.%s' % (self.__class__.__module__,
1352                              self.__class__.__qualname__)]
1353        if self.fp is not None:
1354            if self._filePassed:
1355                result.append(' file=%r' % self.fp)
1356            elif self.filename is not None:
1357                result.append(' filename=%r' % self.filename)
1358            result.append(' mode=%r' % self.mode)
1359        else:
1360            result.append(' [closed]')
1361        result.append('>')
1362        return ''.join(result)
1363
1364    def _RealGetContents(self):
1365        """Read in the table of contents for the ZIP file."""
1366        fp = self.fp
1367        try:
1368            endrec = _EndRecData(fp)
1369        except OSError:
1370            raise BadZipFile("File is not a zip file")
1371        if not endrec:
1372            raise BadZipFile("File is not a zip file")
1373        if self.debug > 1:
1374            print(endrec)
1375        size_cd = endrec[_ECD_SIZE]             # bytes in central directory
1376        offset_cd = endrec[_ECD_OFFSET]         # offset of central directory
1377        self._comment = endrec[_ECD_COMMENT]    # archive comment
1378
1379        # "concat" is zero, unless zip was concatenated to another file
1380        concat = endrec[_ECD_LOCATION] - size_cd - offset_cd
1381        if endrec[_ECD_SIGNATURE] == stringEndArchive64:
1382            # If Zip64 extension structures are present, account for them
1383            concat -= (sizeEndCentDir64 + sizeEndCentDir64Locator)
1384
1385        if self.debug > 2:
1386            inferred = concat + offset_cd
1387            print("given, inferred, offset", offset_cd, inferred, concat)
1388        # self.start_dir:  Position of start of central directory
1389        self.start_dir = offset_cd + concat
1390        if self.start_dir < 0:
1391            raise BadZipFile("Bad offset for central directory")
1392        fp.seek(self.start_dir, 0)
1393        data = fp.read(size_cd)
1394        fp = io.BytesIO(data)
1395        total = 0
1396        while total < size_cd:
1397            centdir = fp.read(sizeCentralDir)
1398            if len(centdir) != sizeCentralDir:
1399                raise BadZipFile("Truncated central directory")
1400            centdir = struct.unpack(structCentralDir, centdir)
1401            if centdir[_CD_SIGNATURE] != stringCentralDir:
1402                raise BadZipFile("Bad magic number for central directory")
1403            if self.debug > 2:
1404                print(centdir)
1405            filename = fp.read(centdir[_CD_FILENAME_LENGTH])
1406            flags = centdir[_CD_FLAG_BITS]
1407            if flags & _MASK_UTF_FILENAME:
1408                # UTF-8 file names extension
1409                filename = filename.decode('utf-8')
1410            else:
1411                # Historical ZIP filename encoding
1412                filename = filename.decode(self.metadata_encoding or 'cp437')
1413            # Create ZipInfo instance to store file information
1414            x = ZipInfo(filename)
1415            x.extra = fp.read(centdir[_CD_EXTRA_FIELD_LENGTH])
1416            x.comment = fp.read(centdir[_CD_COMMENT_LENGTH])
1417            x.header_offset = centdir[_CD_LOCAL_HEADER_OFFSET]
1418            (x.create_version, x.create_system, x.extract_version, x.reserved,
1419             x.flag_bits, x.compress_type, t, d,
1420             x.CRC, x.compress_size, x.file_size) = centdir[1:12]
1421            if x.extract_version > MAX_EXTRACT_VERSION:
1422                raise NotImplementedError("zip file version %.1f" %
1423                                          (x.extract_version / 10))
1424            x.volume, x.internal_attr, x.external_attr = centdir[15:18]
1425            # Convert date/time code to (year, month, day, hour, min, sec)
1426            x._raw_time = t
1427            x.date_time = ( (d>>9)+1980, (d>>5)&0xF, d&0x1F,
1428                            t>>11, (t>>5)&0x3F, (t&0x1F) * 2 )
1429
1430            x._decodeExtra()
1431            x.header_offset = x.header_offset + concat
1432            self.filelist.append(x)
1433            self.NameToInfo[x.filename] = x
1434
1435            # update total bytes read from central directory
1436            total = (total + sizeCentralDir + centdir[_CD_FILENAME_LENGTH]
1437                     + centdir[_CD_EXTRA_FIELD_LENGTH]
1438                     + centdir[_CD_COMMENT_LENGTH])
1439
1440            if self.debug > 2:
1441                print("total", total)
1442
1443        end_offset = self.start_dir
1444        for zinfo in sorted(self.filelist,
1445                            key=lambda zinfo: zinfo.header_offset,
1446                            reverse=True):
1447            zinfo._end_offset = end_offset
1448            end_offset = zinfo.header_offset
1449
1450    def namelist(self):
1451        """Return a list of file names in the archive."""
1452        return [data.filename for data in self.filelist]
1453
1454    def infolist(self):
1455        """Return a list of class ZipInfo instances for files in the
1456        archive."""
1457        return self.filelist
1458
1459    def printdir(self, file=None):
1460        """Print a table of contents for the zip file."""
1461        print("%-46s %19s %12s" % ("File Name", "Modified    ", "Size"),
1462              file=file)
1463        for zinfo in self.filelist:
1464            date = "%d-%02d-%02d %02d:%02d:%02d" % zinfo.date_time[:6]
1465            print("%-46s %s %12d" % (zinfo.filename, date, zinfo.file_size),
1466                  file=file)
1467
1468    def testzip(self):
1469        """Read all the files and check the CRC."""
1470        chunk_size = 2 ** 20
1471        for zinfo in self.filelist:
1472            try:
1473                # Read by chunks, to avoid an OverflowError or a
1474                # MemoryError with very large embedded files.
1475                with self.open(zinfo.filename, "r") as f:
1476                    while f.read(chunk_size):     # Check CRC-32
1477                        pass
1478            except BadZipFile:
1479                return zinfo.filename
1480
1481    def getinfo(self, name):
1482        """Return the instance of ZipInfo given 'name'."""
1483        info = self.NameToInfo.get(name)
1484        if info is None:
1485            raise KeyError(
1486                'There is no item named %r in the archive' % name)
1487
1488        return info
1489
1490    def setpassword(self, pwd):
1491        """Set default password for encrypted files."""
1492        if pwd and not isinstance(pwd, bytes):
1493            raise TypeError("pwd: expected bytes, got %s" % type(pwd).__name__)
1494        if pwd:
1495            self.pwd = pwd
1496        else:
1497            self.pwd = None
1498
1499    @property
1500    def comment(self):
1501        """The comment text associated with the ZIP file."""
1502        return self._comment
1503
1504    @comment.setter
1505    def comment(self, comment):
1506        if not isinstance(comment, bytes):
1507            raise TypeError("comment: expected bytes, got %s" % type(comment).__name__)
1508        # check for valid comment length
1509        if len(comment) > ZIP_MAX_COMMENT:
1510            import warnings
1511            warnings.warn('Archive comment is too long; truncating to %d bytes'
1512                          % ZIP_MAX_COMMENT, stacklevel=2)
1513            comment = comment[:ZIP_MAX_COMMENT]
1514        self._comment = comment
1515        self._didModify = True
1516
1517    def read(self, name, pwd=None):
1518        """Return file bytes for name."""
1519        with self.open(name, "r", pwd) as fp:
1520            return fp.read()
1521
1522    def open(self, name, mode="r", pwd=None, *, force_zip64=False):
1523        """Return file-like object for 'name'.
1524
1525        name is a string for the file name within the ZIP file, or a ZipInfo
1526        object.
1527
1528        mode should be 'r' to read a file already in the ZIP file, or 'w' to
1529        write to a file newly added to the archive.
1530
1531        pwd is the password to decrypt files (only used for reading).
1532
1533        When writing, if the file size is not known in advance but may exceed
1534        2 GiB, pass force_zip64 to use the ZIP64 format, which can handle large
1535        files.  If the size is known in advance, it is best to pass a ZipInfo
1536        instance for name, with zinfo.file_size set.
1537        """
1538        if mode not in {"r", "w"}:
1539            raise ValueError('open() requires mode "r" or "w"')
1540        if pwd and (mode == "w"):
1541            raise ValueError("pwd is only supported for reading files")
1542        if not self.fp:
1543            raise ValueError(
1544                "Attempt to use ZIP archive that was already closed")
1545
1546        # Make sure we have an info object
1547        if isinstance(name, ZipInfo):
1548            # 'name' is already an info object
1549            zinfo = name
1550        elif mode == 'w':
1551            zinfo = ZipInfo(name)
1552            zinfo.compress_type = self.compression
1553            zinfo._compresslevel = self.compresslevel
1554        else:
1555            # Get info object for name
1556            zinfo = self.getinfo(name)
1557
1558        if mode == 'w':
1559            return self._open_to_write(zinfo, force_zip64=force_zip64)
1560
1561        if self._writing:
1562            raise ValueError("Can't read from the ZIP file while there "
1563                    "is an open writing handle on it. "
1564                    "Close the writing handle before trying to read.")
1565
1566        # Open for reading:
1567        self._fileRefCnt += 1
1568        zef_file = _SharedFile(self.fp, zinfo.header_offset,
1569                               self._fpclose, self._lock, lambda: self._writing)
1570        try:
1571            # Skip the file header:
1572            fheader = zef_file.read(sizeFileHeader)
1573            if len(fheader) != sizeFileHeader:
1574                raise BadZipFile("Truncated file header")
1575            fheader = struct.unpack(structFileHeader, fheader)
1576            if fheader[_FH_SIGNATURE] != stringFileHeader:
1577                raise BadZipFile("Bad magic number for file header")
1578
1579            fname = zef_file.read(fheader[_FH_FILENAME_LENGTH])
1580            if fheader[_FH_EXTRA_FIELD_LENGTH]:
1581                zef_file.read(fheader[_FH_EXTRA_FIELD_LENGTH])
1582
1583            if zinfo.flag_bits & _MASK_COMPRESSED_PATCH:
1584                # Zip 2.7: compressed patched data
1585                raise NotImplementedError("compressed patched data (flag bit 5)")
1586
1587            if zinfo.flag_bits & _MASK_STRONG_ENCRYPTION:
1588                # strong encryption
1589                raise NotImplementedError("strong encryption (flag bit 6)")
1590
1591            if fheader[_FH_GENERAL_PURPOSE_FLAG_BITS] & _MASK_UTF_FILENAME:
1592                # UTF-8 filename
1593                fname_str = fname.decode("utf-8")
1594            else:
1595                fname_str = fname.decode(self.metadata_encoding or "cp437")
1596
1597            if fname_str != zinfo.orig_filename:
1598                raise BadZipFile(
1599                    'File name in directory %r and header %r differ.'
1600                    % (zinfo.orig_filename, fname))
1601
1602            if (zinfo._end_offset is not None and
1603                zef_file.tell() + zinfo.compress_size > zinfo._end_offset):
1604                raise BadZipFile(f"Overlapped entries: {zinfo.orig_filename!r} (possible zip bomb)")
1605
1606            # check for encrypted flag & handle password
1607            is_encrypted = zinfo.flag_bits & _MASK_ENCRYPTED
1608            if is_encrypted:
1609                if not pwd:
1610                    pwd = self.pwd
1611                if pwd and not isinstance(pwd, bytes):
1612                    raise TypeError("pwd: expected bytes, got %s" % type(pwd).__name__)
1613                if not pwd:
1614                    raise RuntimeError("File %r is encrypted, password "
1615                                       "required for extraction" % name)
1616            else:
1617                pwd = None
1618
1619            return ZipExtFile(zef_file, mode, zinfo, pwd, True)
1620        except:
1621            zef_file.close()
1622            raise
1623
1624    def _open_to_write(self, zinfo, force_zip64=False):
1625        if force_zip64 and not self._allowZip64:
1626            raise ValueError(
1627                "force_zip64 is True, but allowZip64 was False when opening "
1628                "the ZIP file."
1629            )
1630        if self._writing:
1631            raise ValueError("Can't write to the ZIP file while there is "
1632                             "another write handle open on it. "
1633                             "Close the first handle before opening another.")
1634
1635        # Size and CRC are overwritten with correct data after processing the file
1636        zinfo.compress_size = 0
1637        zinfo.CRC = 0
1638
1639        zinfo.flag_bits = 0x00
1640        if zinfo.compress_type == ZIP_LZMA:
1641            # Compressed data includes an end-of-stream (EOS) marker
1642            zinfo.flag_bits |= _MASK_COMPRESS_OPTION_1
1643        if not self._seekable:
1644            zinfo.flag_bits |= _MASK_USE_DATA_DESCRIPTOR
1645
1646        if not zinfo.external_attr:
1647            zinfo.external_attr = 0o600 << 16  # permissions: ?rw-------
1648
1649        # Compressed size can be larger than uncompressed size
1650        zip64 = force_zip64 or (zinfo.file_size * 1.05 > ZIP64_LIMIT)
1651        if not self._allowZip64 and zip64:
1652            raise LargeZipFile("Filesize would require ZIP64 extensions")
1653
1654        if self._seekable:
1655            self.fp.seek(self.start_dir)
1656        zinfo.header_offset = self.fp.tell()
1657
1658        self._writecheck(zinfo)
1659        self._didModify = True
1660
1661        self.fp.write(zinfo.FileHeader(zip64))
1662
1663        self._writing = True
1664        return _ZipWriteFile(self, zinfo, zip64)
1665
1666    def extract(self, member, path=None, pwd=None):
1667        """Extract a member from the archive to the current working directory,
1668           using its full name. Its file information is extracted as accurately
1669           as possible. `member' may be a filename or a ZipInfo object. You can
1670           specify a different directory using `path'.
1671        """
1672        if path is None:
1673            path = os.getcwd()
1674        else:
1675            path = os.fspath(path)
1676
1677        return self._extract_member(member, path, pwd)
1678
1679    def extractall(self, path=None, members=None, pwd=None):
1680        """Extract all members from the archive to the current working
1681           directory. `path' specifies a different directory to extract to.
1682           `members' is optional and must be a subset of the list returned
1683           by namelist().
1684        """
1685        if members is None:
1686            members = self.namelist()
1687
1688        if path is None:
1689            path = os.getcwd()
1690        else:
1691            path = os.fspath(path)
1692
1693        for zipinfo in members:
1694            self._extract_member(zipinfo, path, pwd)
1695
1696    @classmethod
1697    def _sanitize_windows_name(cls, arcname, pathsep):
1698        """Replace bad characters and remove trailing dots from parts."""
1699        table = cls._windows_illegal_name_trans_table
1700        if not table:
1701            illegal = ':<>|"?*'
1702            table = str.maketrans(illegal, '_' * len(illegal))
1703            cls._windows_illegal_name_trans_table = table
1704        arcname = arcname.translate(table)
1705        # remove trailing dots
1706        arcname = (x.rstrip('.') for x in arcname.split(pathsep))
1707        # rejoin, removing empty parts.
1708        arcname = pathsep.join(x for x in arcname if x)
1709        return arcname
1710
1711    def _extract_member(self, member, targetpath, pwd):
1712        """Extract the ZipInfo object 'member' to a physical
1713           file on the path targetpath.
1714        """
1715        if not isinstance(member, ZipInfo):
1716            member = self.getinfo(member)
1717
1718        # build the destination pathname, replacing
1719        # forward slashes to platform specific separators.
1720        arcname = member.filename.replace('/', os.path.sep)
1721
1722        if os.path.altsep:
1723            arcname = arcname.replace(os.path.altsep, os.path.sep)
1724        # interpret absolute pathname as relative, remove drive letter or
1725        # UNC path, redundant separators, "." and ".." components.
1726        arcname = os.path.splitdrive(arcname)[1]
1727        invalid_path_parts = ('', os.path.curdir, os.path.pardir)
1728        arcname = os.path.sep.join(x for x in arcname.split(os.path.sep)
1729                                   if x not in invalid_path_parts)
1730        if os.path.sep == '\\':
1731            # filter illegal characters on Windows
1732            arcname = self._sanitize_windows_name(arcname, os.path.sep)
1733
1734        targetpath = os.path.join(targetpath, arcname)
1735        targetpath = os.path.normpath(targetpath)
1736
1737        # Create all upper directories if necessary.
1738        upperdirs = os.path.dirname(targetpath)
1739        if upperdirs and not os.path.exists(upperdirs):
1740            os.makedirs(upperdirs)
1741
1742        if member.is_dir():
1743            if not os.path.isdir(targetpath):
1744                os.mkdir(targetpath)
1745            return targetpath
1746
1747        with self.open(member, pwd=pwd) as source, \
1748             open(targetpath, "wb") as target:
1749            shutil.copyfileobj(source, target)
1750
1751        return targetpath
1752
1753    def _writecheck(self, zinfo):
1754        """Check for errors before writing a file to the archive."""
1755        if zinfo.filename in self.NameToInfo:
1756            import warnings
1757            warnings.warn('Duplicate name: %r' % zinfo.filename, stacklevel=3)
1758        if self.mode not in ('w', 'x', 'a'):
1759            raise ValueError("write() requires mode 'w', 'x', or 'a'")
1760        if not self.fp:
1761            raise ValueError(
1762                "Attempt to write ZIP archive that was already closed")
1763        _check_compression(zinfo.compress_type)
1764        if not self._allowZip64:
1765            requires_zip64 = None
1766            if len(self.filelist) >= ZIP_FILECOUNT_LIMIT:
1767                requires_zip64 = "Files count"
1768            elif zinfo.file_size > ZIP64_LIMIT:
1769                requires_zip64 = "Filesize"
1770            elif zinfo.header_offset > ZIP64_LIMIT:
1771                requires_zip64 = "Zipfile size"
1772            if requires_zip64:
1773                raise LargeZipFile(requires_zip64 +
1774                                   " would require ZIP64 extensions")
1775
1776    def write(self, filename, arcname=None,
1777              compress_type=None, compresslevel=None):
1778        """Put the bytes from filename into the archive under the name
1779        arcname."""
1780        if not self.fp:
1781            raise ValueError(
1782                "Attempt to write to ZIP archive that was already closed")
1783        if self._writing:
1784            raise ValueError(
1785                "Can't write to ZIP archive while an open writing handle exists"
1786            )
1787
1788        zinfo = ZipInfo.from_file(filename, arcname,
1789                                  strict_timestamps=self._strict_timestamps)
1790
1791        if zinfo.is_dir():
1792            zinfo.compress_size = 0
1793            zinfo.CRC = 0
1794            self.mkdir(zinfo)
1795        else:
1796            if compress_type is not None:
1797                zinfo.compress_type = compress_type
1798            else:
1799                zinfo.compress_type = self.compression
1800
1801            if compresslevel is not None:
1802                zinfo._compresslevel = compresslevel
1803            else:
1804                zinfo._compresslevel = self.compresslevel
1805
1806            with open(filename, "rb") as src, self.open(zinfo, 'w') as dest:
1807                shutil.copyfileobj(src, dest, 1024*8)
1808
1809    def writestr(self, zinfo_or_arcname, data,
1810                 compress_type=None, compresslevel=None):
1811        """Write a file into the archive.  The contents is 'data', which
1812        may be either a 'str' or a 'bytes' instance; if it is a 'str',
1813        it is encoded as UTF-8 first.
1814        'zinfo_or_arcname' is either a ZipInfo instance or
1815        the name of the file in the archive."""
1816        if isinstance(data, str):
1817            data = data.encode("utf-8")
1818        if not isinstance(zinfo_or_arcname, ZipInfo):
1819            zinfo = ZipInfo(filename=zinfo_or_arcname,
1820                            date_time=time.localtime(time.time())[:6])
1821            zinfo.compress_type = self.compression
1822            zinfo._compresslevel = self.compresslevel
1823            if zinfo.filename[-1] == '/':
1824                zinfo.external_attr = 0o40775 << 16   # drwxrwxr-x
1825                zinfo.external_attr |= 0x10           # MS-DOS directory flag
1826            else:
1827                zinfo.external_attr = 0o600 << 16     # ?rw-------
1828        else:
1829            zinfo = zinfo_or_arcname
1830
1831        if not self.fp:
1832            raise ValueError(
1833                "Attempt to write to ZIP archive that was already closed")
1834        if self._writing:
1835            raise ValueError(
1836                "Can't write to ZIP archive while an open writing handle exists."
1837            )
1838
1839        if compress_type is not None:
1840            zinfo.compress_type = compress_type
1841
1842        if compresslevel is not None:
1843            zinfo._compresslevel = compresslevel
1844
1845        zinfo.file_size = len(data)            # Uncompressed size
1846        with self._lock:
1847            with self.open(zinfo, mode='w') as dest:
1848                dest.write(data)
1849
1850    def mkdir(self, zinfo_or_directory_name, mode=511):
1851        """Creates a directory inside the zip archive."""
1852        if isinstance(zinfo_or_directory_name, ZipInfo):
1853            zinfo = zinfo_or_directory_name
1854            if not zinfo.is_dir():
1855                raise ValueError("The given ZipInfo does not describe a directory")
1856        elif isinstance(zinfo_or_directory_name, str):
1857            directory_name = zinfo_or_directory_name
1858            if not directory_name.endswith("/"):
1859                directory_name += "/"
1860            zinfo = ZipInfo(directory_name)
1861            zinfo.compress_size = 0
1862            zinfo.CRC = 0
1863            zinfo.external_attr = ((0o40000 | mode) & 0xFFFF) << 16
1864            zinfo.file_size = 0
1865            zinfo.external_attr |= 0x10
1866        else:
1867            raise TypeError("Expected type str or ZipInfo")
1868
1869        with self._lock:
1870            if self._seekable:
1871                self.fp.seek(self.start_dir)
1872            zinfo.header_offset = self.fp.tell()  # Start of header bytes
1873            if zinfo.compress_type == ZIP_LZMA:
1874            # Compressed data includes an end-of-stream (EOS) marker
1875                zinfo.flag_bits |= _MASK_COMPRESS_OPTION_1
1876
1877            self._writecheck(zinfo)
1878            self._didModify = True
1879
1880            self.filelist.append(zinfo)
1881            self.NameToInfo[zinfo.filename] = zinfo
1882            self.fp.write(zinfo.FileHeader(False))
1883            self.start_dir = self.fp.tell()
1884
1885    def __del__(self):
1886        """Call the "close()" method in case the user forgot."""
1887        self.close()
1888
1889    def close(self):
1890        """Close the file, and for mode 'w', 'x' and 'a' write the ending
1891        records."""
1892        if self.fp is None:
1893            return
1894
1895        if self._writing:
1896            raise ValueError("Can't close the ZIP file while there is "
1897                             "an open writing handle on it. "
1898                             "Close the writing handle before closing the zip.")
1899
1900        try:
1901            if self.mode in ('w', 'x', 'a') and self._didModify: # write ending records
1902                with self._lock:
1903                    if self._seekable:
1904                        self.fp.seek(self.start_dir)
1905                    self._write_end_record()
1906        finally:
1907            fp = self.fp
1908            self.fp = None
1909            self._fpclose(fp)
1910
1911    def _write_end_record(self):
1912        for zinfo in self.filelist:         # write central directory
1913            dt = zinfo.date_time
1914            dosdate = (dt[0] - 1980) << 9 | dt[1] << 5 | dt[2]
1915            dostime = dt[3] << 11 | dt[4] << 5 | (dt[5] // 2)
1916            extra = []
1917            if zinfo.file_size > ZIP64_LIMIT \
1918               or zinfo.compress_size > ZIP64_LIMIT:
1919                extra.append(zinfo.file_size)
1920                extra.append(zinfo.compress_size)
1921                file_size = 0xffffffff
1922                compress_size = 0xffffffff
1923            else:
1924                file_size = zinfo.file_size
1925                compress_size = zinfo.compress_size
1926
1927            if zinfo.header_offset > ZIP64_LIMIT:
1928                extra.append(zinfo.header_offset)
1929                header_offset = 0xffffffff
1930            else:
1931                header_offset = zinfo.header_offset
1932
1933            extra_data = zinfo.extra
1934            min_version = 0
1935            if extra:
1936                # Append a ZIP64 field to the extra's
1937                extra_data = _strip_extra(extra_data, (1,))
1938                extra_data = struct.pack(
1939                    '<HH' + 'Q'*len(extra),
1940                    1, 8*len(extra), *extra) + extra_data
1941
1942                min_version = ZIP64_VERSION
1943
1944            if zinfo.compress_type == ZIP_BZIP2:
1945                min_version = max(BZIP2_VERSION, min_version)
1946            elif zinfo.compress_type == ZIP_LZMA:
1947                min_version = max(LZMA_VERSION, min_version)
1948
1949            extract_version = max(min_version, zinfo.extract_version)
1950            create_version = max(min_version, zinfo.create_version)
1951            filename, flag_bits = zinfo._encodeFilenameFlags()
1952            centdir = struct.pack(structCentralDir,
1953                                  stringCentralDir, create_version,
1954                                  zinfo.create_system, extract_version, zinfo.reserved,
1955                                  flag_bits, zinfo.compress_type, dostime, dosdate,
1956                                  zinfo.CRC, compress_size, file_size,
1957                                  len(filename), len(extra_data), len(zinfo.comment),
1958                                  0, zinfo.internal_attr, zinfo.external_attr,
1959                                  header_offset)
1960            self.fp.write(centdir)
1961            self.fp.write(filename)
1962            self.fp.write(extra_data)
1963            self.fp.write(zinfo.comment)
1964
1965        pos2 = self.fp.tell()
1966        # Write end-of-zip-archive record
1967        centDirCount = len(self.filelist)
1968        centDirSize = pos2 - self.start_dir
1969        centDirOffset = self.start_dir
1970        requires_zip64 = None
1971        if centDirCount > ZIP_FILECOUNT_LIMIT:
1972            requires_zip64 = "Files count"
1973        elif centDirOffset > ZIP64_LIMIT:
1974            requires_zip64 = "Central directory offset"
1975        elif centDirSize > ZIP64_LIMIT:
1976            requires_zip64 = "Central directory size"
1977        if requires_zip64:
1978            # Need to write the ZIP64 end-of-archive records
1979            if not self._allowZip64:
1980                raise LargeZipFile(requires_zip64 +
1981                                   " would require ZIP64 extensions")
1982            zip64endrec = struct.pack(
1983                structEndArchive64, stringEndArchive64,
1984                44, 45, 45, 0, 0, centDirCount, centDirCount,
1985                centDirSize, centDirOffset)
1986            self.fp.write(zip64endrec)
1987
1988            zip64locrec = struct.pack(
1989                structEndArchive64Locator,
1990                stringEndArchive64Locator, 0, pos2, 1)
1991            self.fp.write(zip64locrec)
1992            centDirCount = min(centDirCount, 0xFFFF)
1993            centDirSize = min(centDirSize, 0xFFFFFFFF)
1994            centDirOffset = min(centDirOffset, 0xFFFFFFFF)
1995
1996        endrec = struct.pack(structEndArchive, stringEndArchive,
1997                             0, 0, centDirCount, centDirCount,
1998                             centDirSize, centDirOffset, len(self._comment))
1999        self.fp.write(endrec)
2000        self.fp.write(self._comment)
2001        if self.mode == "a":
2002            self.fp.truncate()
2003        self.fp.flush()
2004
2005    def _fpclose(self, fp):
2006        assert self._fileRefCnt > 0
2007        self._fileRefCnt -= 1
2008        if not self._fileRefCnt and not self._filePassed:
2009            fp.close()
2010
2011
2012class PyZipFile(ZipFile):
2013    """Class to create ZIP archives with Python library files and packages."""
2014
2015    def __init__(self, file, mode="r", compression=ZIP_STORED,
2016                 allowZip64=True, optimize=-1):
2017        ZipFile.__init__(self, file, mode=mode, compression=compression,
2018                         allowZip64=allowZip64)
2019        self._optimize = optimize
2020
2021    def writepy(self, pathname, basename="", filterfunc=None):
2022        """Add all files from "pathname" to the ZIP archive.
2023
2024        If pathname is a package directory, search the directory and
2025        all package subdirectories recursively for all *.py and enter
2026        the modules into the archive.  If pathname is a plain
2027        directory, listdir *.py and enter all modules.  Else, pathname
2028        must be a Python *.py file and the module will be put into the
2029        archive.  Added modules are always module.pyc.
2030        This method will compile the module.py into module.pyc if
2031        necessary.
2032        If filterfunc(pathname) is given, it is called with every argument.
2033        When it is False, the file or directory is skipped.
2034        """
2035        pathname = os.fspath(pathname)
2036        if filterfunc and not filterfunc(pathname):
2037            if self.debug:
2038                label = 'path' if os.path.isdir(pathname) else 'file'
2039                print('%s %r skipped by filterfunc' % (label, pathname))
2040            return
2041        dir, name = os.path.split(pathname)
2042        if os.path.isdir(pathname):
2043            initname = os.path.join(pathname, "__init__.py")
2044            if os.path.isfile(initname):
2045                # This is a package directory, add it
2046                if basename:
2047                    basename = "%s/%s" % (basename, name)
2048                else:
2049                    basename = name
2050                if self.debug:
2051                    print("Adding package in", pathname, "as", basename)
2052                fname, arcname = self._get_codename(initname[0:-3], basename)
2053                if self.debug:
2054                    print("Adding", arcname)
2055                self.write(fname, arcname)
2056                dirlist = sorted(os.listdir(pathname))
2057                dirlist.remove("__init__.py")
2058                # Add all *.py files and package subdirectories
2059                for filename in dirlist:
2060                    path = os.path.join(pathname, filename)
2061                    root, ext = os.path.splitext(filename)
2062                    if os.path.isdir(path):
2063                        if os.path.isfile(os.path.join(path, "__init__.py")):
2064                            # This is a package directory, add it
2065                            self.writepy(path, basename,
2066                                         filterfunc=filterfunc)  # Recursive call
2067                    elif ext == ".py":
2068                        if filterfunc and not filterfunc(path):
2069                            if self.debug:
2070                                print('file %r skipped by filterfunc' % path)
2071                            continue
2072                        fname, arcname = self._get_codename(path[0:-3],
2073                                                            basename)
2074                        if self.debug:
2075                            print("Adding", arcname)
2076                        self.write(fname, arcname)
2077            else:
2078                # This is NOT a package directory, add its files at top level
2079                if self.debug:
2080                    print("Adding files from directory", pathname)
2081                for filename in sorted(os.listdir(pathname)):
2082                    path = os.path.join(pathname, filename)
2083                    root, ext = os.path.splitext(filename)
2084                    if ext == ".py":
2085                        if filterfunc and not filterfunc(path):
2086                            if self.debug:
2087                                print('file %r skipped by filterfunc' % path)
2088                            continue
2089                        fname, arcname = self._get_codename(path[0:-3],
2090                                                            basename)
2091                        if self.debug:
2092                            print("Adding", arcname)
2093                        self.write(fname, arcname)
2094        else:
2095            if pathname[-3:] != ".py":
2096                raise RuntimeError(
2097                    'Files added with writepy() must end with ".py"')
2098            fname, arcname = self._get_codename(pathname[0:-3], basename)
2099            if self.debug:
2100                print("Adding file", arcname)
2101            self.write(fname, arcname)
2102
2103    def _get_codename(self, pathname, basename):
2104        """Return (filename, archivename) for the path.
2105
2106        Given a module name path, return the correct file path and
2107        archive name, compiling if necessary.  For example, given
2108        /python/lib/string, return (/python/lib/string.pyc, string).
2109        """
2110        def _compile(file, optimize=-1):
2111            import py_compile
2112            if self.debug:
2113                print("Compiling", file)
2114            try:
2115                py_compile.compile(file, doraise=True, optimize=optimize)
2116            except py_compile.PyCompileError as err:
2117                print(err.msg)
2118                return False
2119            return True
2120
2121        file_py  = pathname + ".py"
2122        file_pyc = pathname + ".pyc"
2123        pycache_opt0 = importlib.util.cache_from_source(file_py, optimization='')
2124        pycache_opt1 = importlib.util.cache_from_source(file_py, optimization=1)
2125        pycache_opt2 = importlib.util.cache_from_source(file_py, optimization=2)
2126        if self._optimize == -1:
2127            # legacy mode: use whatever file is present
2128            if (os.path.isfile(file_pyc) and
2129                  os.stat(file_pyc).st_mtime >= os.stat(file_py).st_mtime):
2130                # Use .pyc file.
2131                arcname = fname = file_pyc
2132            elif (os.path.isfile(pycache_opt0) and
2133                  os.stat(pycache_opt0).st_mtime >= os.stat(file_py).st_mtime):
2134                # Use the __pycache__/*.pyc file, but write it to the legacy pyc
2135                # file name in the archive.
2136                fname = pycache_opt0
2137                arcname = file_pyc
2138            elif (os.path.isfile(pycache_opt1) and
2139                  os.stat(pycache_opt1).st_mtime >= os.stat(file_py).st_mtime):
2140                # Use the __pycache__/*.pyc file, but write it to the legacy pyc
2141                # file name in the archive.
2142                fname = pycache_opt1
2143                arcname = file_pyc
2144            elif (os.path.isfile(pycache_opt2) and
2145                  os.stat(pycache_opt2).st_mtime >= os.stat(file_py).st_mtime):
2146                # Use the __pycache__/*.pyc file, but write it to the legacy pyc
2147                # file name in the archive.
2148                fname = pycache_opt2
2149                arcname = file_pyc
2150            else:
2151                # Compile py into PEP 3147 pyc file.
2152                if _compile(file_py):
2153                    if sys.flags.optimize == 0:
2154                        fname = pycache_opt0
2155                    elif sys.flags.optimize == 1:
2156                        fname = pycache_opt1
2157                    else:
2158                        fname = pycache_opt2
2159                    arcname = file_pyc
2160                else:
2161                    fname = arcname = file_py
2162        else:
2163            # new mode: use given optimization level
2164            if self._optimize == 0:
2165                fname = pycache_opt0
2166                arcname = file_pyc
2167            else:
2168                arcname = file_pyc
2169                if self._optimize == 1:
2170                    fname = pycache_opt1
2171                elif self._optimize == 2:
2172                    fname = pycache_opt2
2173                else:
2174                    msg = "invalid value for 'optimize': {!r}".format(self._optimize)
2175                    raise ValueError(msg)
2176            if not (os.path.isfile(fname) and
2177                    os.stat(fname).st_mtime >= os.stat(file_py).st_mtime):
2178                if not _compile(file_py, optimize=self._optimize):
2179                    fname = arcname = file_py
2180        archivename = os.path.split(arcname)[1]
2181        if basename:
2182            archivename = "%s/%s" % (basename, archivename)
2183        return (fname, archivename)
2184
2185
2186def _parents(path):
2187    """
2188    Given a path with elements separated by
2189    posixpath.sep, generate all parents of that path.
2190
2191    >>> list(_parents('b/d'))
2192    ['b']
2193    >>> list(_parents('/b/d/'))
2194    ['/b']
2195    >>> list(_parents('b/d/f/'))
2196    ['b/d', 'b']
2197    >>> list(_parents('b'))
2198    []
2199    >>> list(_parents(''))
2200    []
2201    """
2202    return itertools.islice(_ancestry(path), 1, None)
2203
2204
2205def _ancestry(path):
2206    """
2207    Given a path with elements separated by
2208    posixpath.sep, generate all elements of that path
2209
2210    >>> list(_ancestry('b/d'))
2211    ['b/d', 'b']
2212    >>> list(_ancestry('/b/d/'))
2213    ['/b/d', '/b']
2214    >>> list(_ancestry('b/d/f/'))
2215    ['b/d/f', 'b/d', 'b']
2216    >>> list(_ancestry('b'))
2217    ['b']
2218    >>> list(_ancestry(''))
2219    []
2220    """
2221    path = path.rstrip(posixpath.sep)
2222    while path and path != posixpath.sep:
2223        yield path
2224        path, tail = posixpath.split(path)
2225
2226
2227_dedupe = dict.fromkeys
2228"""Deduplicate an iterable in original order"""
2229
2230
2231def _difference(minuend, subtrahend):
2232    """
2233    Return items in minuend not in subtrahend, retaining order
2234    with O(1) lookup.
2235    """
2236    return itertools.filterfalse(set(subtrahend).__contains__, minuend)
2237
2238
2239class SanitizedNames:
2240    """
2241    ZipFile mix-in to ensure names are sanitized.
2242    """
2243
2244    def namelist(self):
2245        return list(map(self._sanitize, super().namelist()))
2246
2247    @staticmethod
2248    def _sanitize(name):
2249        r"""
2250        Ensure a relative path with posix separators and no dot names.
2251        Modeled after
2252        https://github.com/python/cpython/blob/bcc1be39cb1d04ad9fc0bd1b9193d3972835a57c/Lib/zipfile/__init__.py#L1799-L1813
2253        but provides consistent cross-platform behavior.
2254        >>> san = SanitizedNames._sanitize
2255        >>> san('/foo/bar')
2256        'foo/bar'
2257        >>> san('//foo.txt')
2258        'foo.txt'
2259        >>> san('foo/.././bar.txt')
2260        'foo/bar.txt'
2261        >>> san('foo../.bar.txt')
2262        'foo../.bar.txt'
2263        >>> san('\\foo\\bar.txt')
2264        'foo/bar.txt'
2265        >>> san('D:\\foo.txt')
2266        'D/foo.txt'
2267        >>> san('\\\\server\\share\\file.txt')
2268        'server/share/file.txt'
2269        >>> san('\\\\?\\GLOBALROOT\\Volume3')
2270        '?/GLOBALROOT/Volume3'
2271        >>> san('\\\\.\\PhysicalDrive1\\root')
2272        'PhysicalDrive1/root'
2273        Retain any trailing slash.
2274        >>> san('abc/')
2275        'abc/'
2276        Raises a ValueError if the result is empty.
2277        >>> san('../..')
2278        Traceback (most recent call last):
2279        ...
2280        ValueError: Empty filename
2281        """
2282
2283        def allowed(part):
2284            return part and part not in {'..', '.'}
2285
2286        # Remove the drive letter.
2287        # Don't use ntpath.splitdrive, because that also strips UNC paths
2288        bare = re.sub('^([A-Z]):', r'\1', name, flags=re.IGNORECASE)
2289        clean = bare.replace('\\', '/')
2290        parts = clean.split('/')
2291        joined = '/'.join(filter(allowed, parts))
2292        if not joined:
2293            raise ValueError("Empty filename")
2294        return joined + '/' * name.endswith('/')
2295
2296
2297class CompleteDirs(SanitizedNames, ZipFile):
2298    """
2299    A ZipFile subclass that ensures that implied directories
2300    are always included in the namelist.
2301    """
2302
2303    @staticmethod
2304    def _implied_dirs(names):
2305        parents = itertools.chain.from_iterable(map(_parents, names))
2306        as_dirs = (p + posixpath.sep for p in parents)
2307        return _dedupe(_difference(as_dirs, names))
2308
2309    def namelist(self):
2310        names = super(CompleteDirs, self).namelist()
2311        return names + list(self._implied_dirs(names))
2312
2313    def _name_set(self):
2314        return set(self.namelist())
2315
2316    def resolve_dir(self, name):
2317        """
2318        If the name represents a directory, return that name
2319        as a directory (with the trailing slash).
2320        """
2321        names = self._name_set()
2322        dirname = name + '/'
2323        dir_match = name not in names and dirname in names
2324        return dirname if dir_match else name
2325
2326    def getinfo(self, name):
2327        """
2328        Supplement getinfo for implied dirs.
2329        """
2330        try:
2331            return super().getinfo(name)
2332        except KeyError:
2333            if not name.endswith('/') or name not in self._name_set():
2334                raise
2335            return ZipInfo(filename=name)
2336
2337    @classmethod
2338    def make(cls, source):
2339        """
2340        Given a source (filename or zipfile), return an
2341        appropriate CompleteDirs subclass.
2342        """
2343        if isinstance(source, CompleteDirs):
2344            return source
2345
2346        if not isinstance(source, ZipFile):
2347            return cls(source)
2348
2349        # Only allow for FastLookup when supplied zipfile is read-only
2350        if 'r' not in source.mode:
2351            cls = CompleteDirs
2352
2353        source.__class__ = cls
2354        return source
2355
2356
2357class FastLookup(CompleteDirs):
2358    """
2359    ZipFile subclass to ensure implicit
2360    dirs exist and are resolved rapidly.
2361    """
2362
2363    def namelist(self):
2364        with contextlib.suppress(AttributeError):
2365            return self.__names
2366        self.__names = super(FastLookup, self).namelist()
2367        return self.__names
2368
2369    def _name_set(self):
2370        with contextlib.suppress(AttributeError):
2371            return self.__lookup
2372        self.__lookup = super(FastLookup, self)._name_set()
2373        return self.__lookup
2374
2375
2376def _extract_text_encoding(encoding=None, *args, **kwargs):
2377    # stacklevel=3 so that the caller of the caller see any warning.
2378    return io.text_encoding(encoding, 3), args, kwargs
2379
2380
2381class Path:
2382    """
2383    A pathlib-compatible interface for zip files.
2384
2385    Consider a zip file with this structure::
2386
2387        .
2388        ├── a.txt
2389        └── b
2390            ├── c.txt
2391            └── d
2392                └── e.txt
2393
2394    >>> data = io.BytesIO()
2395    >>> zf = ZipFile(data, 'w')
2396    >>> zf.writestr('a.txt', 'content of a')
2397    >>> zf.writestr('b/c.txt', 'content of c')
2398    >>> zf.writestr('b/d/e.txt', 'content of e')
2399    >>> zf.filename = 'mem/abcde.zip'
2400
2401    Path accepts the zipfile object itself or a filename
2402
2403    >>> root = Path(zf)
2404
2405    From there, several path operations are available.
2406
2407    Directory iteration (including the zip file itself):
2408
2409    >>> a, b = root.iterdir()
2410    >>> a
2411    Path('mem/abcde.zip', 'a.txt')
2412    >>> b
2413    Path('mem/abcde.zip', 'b/')
2414
2415    name property:
2416
2417    >>> b.name
2418    'b'
2419
2420    join with divide operator:
2421
2422    >>> c = b / 'c.txt'
2423    >>> c
2424    Path('mem/abcde.zip', 'b/c.txt')
2425    >>> c.name
2426    'c.txt'
2427
2428    Read text:
2429
2430    >>> c.read_text()
2431    'content of c'
2432
2433    existence:
2434
2435    >>> c.exists()
2436    True
2437    >>> (b / 'missing.txt').exists()
2438    False
2439
2440    Coercion to string:
2441
2442    >>> import os
2443    >>> str(c).replace(os.sep, posixpath.sep)
2444    'mem/abcde.zip/b/c.txt'
2445
2446    At the root, ``name``, ``filename``, and ``parent``
2447    resolve to the zipfile. Note these attributes are not
2448    valid and will raise a ``ValueError`` if the zipfile
2449    has no filename.
2450
2451    >>> root.name
2452    'abcde.zip'
2453    >>> str(root.filename).replace(os.sep, posixpath.sep)
2454    'mem/abcde.zip'
2455    >>> str(root.parent)
2456    'mem'
2457    """
2458
2459    __repr = "{self.__class__.__name__}({self.root.filename!r}, {self.at!r})"
2460
2461    def __init__(self, root, at=""):
2462        """
2463        Construct a Path from a ZipFile or filename.
2464
2465        Note: When the source is an existing ZipFile object,
2466        its type (__class__) will be mutated to a
2467        specialized type. If the caller wishes to retain the
2468        original type, the caller should either create a
2469        separate ZipFile object or pass a filename.
2470        """
2471        self.root = FastLookup.make(root)
2472        self.at = at
2473
2474    def open(self, mode='r', *args, pwd=None, **kwargs):
2475        """
2476        Open this entry as text or binary following the semantics
2477        of ``pathlib.Path.open()`` by passing arguments through
2478        to io.TextIOWrapper().
2479        """
2480        if self.is_dir():
2481            raise IsADirectoryError(self)
2482        zip_mode = mode[0]
2483        if not self.exists() and zip_mode == 'r':
2484            raise FileNotFoundError(self)
2485        stream = self.root.open(self.at, zip_mode, pwd=pwd)
2486        if 'b' in mode:
2487            if args or kwargs:
2488                raise ValueError("encoding args invalid for binary operation")
2489            return stream
2490        # Text mode:
2491        encoding, args, kwargs = _extract_text_encoding(*args, **kwargs)
2492        return io.TextIOWrapper(stream, encoding, *args, **kwargs)
2493
2494    @property
2495    def name(self):
2496        return pathlib.Path(self.at).name or self.filename.name
2497
2498    @property
2499    def suffix(self):
2500        return pathlib.Path(self.at).suffix or self.filename.suffix
2501
2502    @property
2503    def suffixes(self):
2504        return pathlib.Path(self.at).suffixes or self.filename.suffixes
2505
2506    @property
2507    def stem(self):
2508        return pathlib.Path(self.at).stem or self.filename.stem
2509
2510    @property
2511    def filename(self):
2512        return pathlib.Path(self.root.filename).joinpath(self.at)
2513
2514    def read_text(self, *args, **kwargs):
2515        encoding, args, kwargs = _extract_text_encoding(*args, **kwargs)
2516        with self.open('r', encoding, *args, **kwargs) as strm:
2517            return strm.read()
2518
2519    def read_bytes(self):
2520        with self.open('rb') as strm:
2521            return strm.read()
2522
2523    def _is_child(self, path):
2524        return posixpath.dirname(path.at.rstrip("/")) == self.at.rstrip("/")
2525
2526    def _next(self, at):
2527        return self.__class__(self.root, at)
2528
2529    def is_dir(self):
2530        return not self.at or self.at.endswith("/")
2531
2532    def is_file(self):
2533        return self.exists() and not self.is_dir()
2534
2535    def exists(self):
2536        return self.at in self.root._name_set()
2537
2538    def iterdir(self):
2539        if not self.is_dir():
2540            raise ValueError("Can't listdir a file")
2541        subs = map(self._next, self.root.namelist())
2542        return filter(self._is_child, subs)
2543
2544    def __str__(self):
2545        return posixpath.join(self.root.filename, self.at)
2546
2547    def __repr__(self):
2548        return self.__repr.format(self=self)
2549
2550    def joinpath(self, *other):
2551        next = posixpath.join(self.at, *other)
2552        return self._next(self.root.resolve_dir(next))
2553
2554    __truediv__ = joinpath
2555
2556    @property
2557    def parent(self):
2558        if not self.at:
2559            return self.filename.parent
2560        parent_at = posixpath.dirname(self.at.rstrip('/'))
2561        if parent_at:
2562            parent_at += '/'
2563        return self._next(parent_at)
2564
2565
2566def main(args=None):
2567    import argparse
2568
2569    description = 'A simple command-line interface for zipfile module.'
2570    parser = argparse.ArgumentParser(description=description)
2571    group = parser.add_mutually_exclusive_group(required=True)
2572    group.add_argument('-l', '--list', metavar='<zipfile>',
2573                       help='Show listing of a zipfile')
2574    group.add_argument('-e', '--extract', nargs=2,
2575                       metavar=('<zipfile>', '<output_dir>'),
2576                       help='Extract zipfile into target dir')
2577    group.add_argument('-c', '--create', nargs='+',
2578                       metavar=('<name>', '<file>'),
2579                       help='Create zipfile from sources')
2580    group.add_argument('-t', '--test', metavar='<zipfile>',
2581                       help='Test if a zipfile is valid')
2582    parser.add_argument('--metadata-encoding', metavar='<encoding>',
2583                        help='Specify encoding of member names for -l, -e and -t')
2584    args = parser.parse_args(args)
2585
2586    encoding = args.metadata_encoding
2587
2588    if args.test is not None:
2589        src = args.test
2590        with ZipFile(src, 'r', metadata_encoding=encoding) as zf:
2591            badfile = zf.testzip()
2592        if badfile:
2593            print("The following enclosed file is corrupted: {!r}".format(badfile))
2594        print("Done testing")
2595
2596    elif args.list is not None:
2597        src = args.list
2598        with ZipFile(src, 'r', metadata_encoding=encoding) as zf:
2599            zf.printdir()
2600
2601    elif args.extract is not None:
2602        src, curdir = args.extract
2603        with ZipFile(src, 'r', metadata_encoding=encoding) as zf:
2604            zf.extractall(curdir)
2605
2606    elif args.create is not None:
2607        if encoding:
2608            print("Non-conforming encodings not supported with -c.",
2609                  file=sys.stderr)
2610            sys.exit(1)
2611
2612        zip_name = args.create.pop(0)
2613        files = args.create
2614
2615        def addToZip(zf, path, zippath):
2616            if os.path.isfile(path):
2617                zf.write(path, zippath, ZIP_DEFLATED)
2618            elif os.path.isdir(path):
2619                if zippath:
2620                    zf.write(path, zippath)
2621                for nm in sorted(os.listdir(path)):
2622                    addToZip(zf,
2623                             os.path.join(path, nm), os.path.join(zippath, nm))
2624            # else: ignore
2625
2626        with ZipFile(zip_name, 'w') as zf:
2627            for path in files:
2628                zippath = os.path.basename(path)
2629                if not zippath:
2630                    zippath = os.path.basename(os.path.dirname(path))
2631                if zippath in ('', os.curdir, os.pardir):
2632                    zippath = ''
2633                addToZip(zf, path, zippath)
2634
2635
2636if __name__ == "__main__":
2637    main()
2638