17db96d56Sopenharmony_ci#!/usr/bin/env python3 27db96d56Sopenharmony_ci#------------------------------------------------------------------- 37db96d56Sopenharmony_ci# tarfile.py 47db96d56Sopenharmony_ci#------------------------------------------------------------------- 57db96d56Sopenharmony_ci# Copyright (C) 2002 Lars Gustaebel <lars@gustaebel.de> 67db96d56Sopenharmony_ci# All rights reserved. 77db96d56Sopenharmony_ci# 87db96d56Sopenharmony_ci# Permission is hereby granted, free of charge, to any person 97db96d56Sopenharmony_ci# obtaining a copy of this software and associated documentation 107db96d56Sopenharmony_ci# files (the "Software"), to deal in the Software without 117db96d56Sopenharmony_ci# restriction, including without limitation the rights to use, 127db96d56Sopenharmony_ci# copy, modify, merge, publish, distribute, sublicense, and/or sell 137db96d56Sopenharmony_ci# copies of the Software, and to permit persons to whom the 147db96d56Sopenharmony_ci# Software is furnished to do so, subject to the following 157db96d56Sopenharmony_ci# conditions: 167db96d56Sopenharmony_ci# 177db96d56Sopenharmony_ci# The above copyright notice and this permission notice shall be 187db96d56Sopenharmony_ci# included in all copies or substantial portions of the Software. 197db96d56Sopenharmony_ci# 207db96d56Sopenharmony_ci# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, 217db96d56Sopenharmony_ci# EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES 227db96d56Sopenharmony_ci# OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND 237db96d56Sopenharmony_ci# NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT 247db96d56Sopenharmony_ci# HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, 257db96d56Sopenharmony_ci# WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING 267db96d56Sopenharmony_ci# FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR 277db96d56Sopenharmony_ci# OTHER DEALINGS IN THE SOFTWARE. 287db96d56Sopenharmony_ci# 297db96d56Sopenharmony_ci"""Read from and write to tar format archives. 307db96d56Sopenharmony_ci""" 317db96d56Sopenharmony_ci 327db96d56Sopenharmony_civersion = "0.9.0" 337db96d56Sopenharmony_ci__author__ = "Lars Gust\u00e4bel (lars@gustaebel.de)" 347db96d56Sopenharmony_ci__credits__ = "Gustavo Niemeyer, Niels Gust\u00e4bel, Richard Townsend." 357db96d56Sopenharmony_ci 367db96d56Sopenharmony_ci#--------- 377db96d56Sopenharmony_ci# Imports 387db96d56Sopenharmony_ci#--------- 397db96d56Sopenharmony_cifrom builtins import open as bltn_open 407db96d56Sopenharmony_ciimport sys 417db96d56Sopenharmony_ciimport os 427db96d56Sopenharmony_ciimport io 437db96d56Sopenharmony_ciimport shutil 447db96d56Sopenharmony_ciimport stat 457db96d56Sopenharmony_ciimport time 467db96d56Sopenharmony_ciimport struct 477db96d56Sopenharmony_ciimport copy 487db96d56Sopenharmony_ciimport re 497db96d56Sopenharmony_ciimport warnings 507db96d56Sopenharmony_ci 517db96d56Sopenharmony_citry: 527db96d56Sopenharmony_ci import pwd 537db96d56Sopenharmony_ciexcept ImportError: 547db96d56Sopenharmony_ci pwd = None 557db96d56Sopenharmony_citry: 567db96d56Sopenharmony_ci import grp 577db96d56Sopenharmony_ciexcept ImportError: 587db96d56Sopenharmony_ci grp = None 597db96d56Sopenharmony_ci 607db96d56Sopenharmony_ci# os.symlink on Windows prior to 6.0 raises NotImplementedError 617db96d56Sopenharmony_cisymlink_exception = (AttributeError, NotImplementedError) 627db96d56Sopenharmony_citry: 637db96d56Sopenharmony_ci # OSError (winerror=1314) will be raised if the caller does not hold the 647db96d56Sopenharmony_ci # SeCreateSymbolicLinkPrivilege privilege 657db96d56Sopenharmony_ci symlink_exception += (OSError,) 667db96d56Sopenharmony_ciexcept NameError: 677db96d56Sopenharmony_ci pass 687db96d56Sopenharmony_ci 697db96d56Sopenharmony_ci# from tarfile import * 707db96d56Sopenharmony_ci__all__ = ["TarFile", "TarInfo", "is_tarfile", "TarError", "ReadError", 717db96d56Sopenharmony_ci "CompressionError", "StreamError", "ExtractError", "HeaderError", 727db96d56Sopenharmony_ci "ENCODING", "USTAR_FORMAT", "GNU_FORMAT", "PAX_FORMAT", 737db96d56Sopenharmony_ci "DEFAULT_FORMAT", "open"] 747db96d56Sopenharmony_ci 757db96d56Sopenharmony_ci 767db96d56Sopenharmony_ci#--------------------------------------------------------- 777db96d56Sopenharmony_ci# tar constants 787db96d56Sopenharmony_ci#--------------------------------------------------------- 797db96d56Sopenharmony_ciNUL = b"\0" # the null character 807db96d56Sopenharmony_ciBLOCKSIZE = 512 # length of processing blocks 817db96d56Sopenharmony_ciRECORDSIZE = BLOCKSIZE * 20 # length of records 827db96d56Sopenharmony_ciGNU_MAGIC = b"ustar \0" # magic gnu tar string 837db96d56Sopenharmony_ciPOSIX_MAGIC = b"ustar\x0000" # magic posix tar string 847db96d56Sopenharmony_ci 857db96d56Sopenharmony_ciLENGTH_NAME = 100 # maximum length of a filename 867db96d56Sopenharmony_ciLENGTH_LINK = 100 # maximum length of a linkname 877db96d56Sopenharmony_ciLENGTH_PREFIX = 155 # maximum length of the prefix field 887db96d56Sopenharmony_ci 897db96d56Sopenharmony_ciREGTYPE = b"0" # regular file 907db96d56Sopenharmony_ciAREGTYPE = b"\0" # regular file 917db96d56Sopenharmony_ciLNKTYPE = b"1" # link (inside tarfile) 927db96d56Sopenharmony_ciSYMTYPE = b"2" # symbolic link 937db96d56Sopenharmony_ciCHRTYPE = b"3" # character special device 947db96d56Sopenharmony_ciBLKTYPE = b"4" # block special device 957db96d56Sopenharmony_ciDIRTYPE = b"5" # directory 967db96d56Sopenharmony_ciFIFOTYPE = b"6" # fifo special device 977db96d56Sopenharmony_ciCONTTYPE = b"7" # contiguous file 987db96d56Sopenharmony_ci 997db96d56Sopenharmony_ciGNUTYPE_LONGNAME = b"L" # GNU tar longname 1007db96d56Sopenharmony_ciGNUTYPE_LONGLINK = b"K" # GNU tar longlink 1017db96d56Sopenharmony_ciGNUTYPE_SPARSE = b"S" # GNU tar sparse file 1027db96d56Sopenharmony_ci 1037db96d56Sopenharmony_ciXHDTYPE = b"x" # POSIX.1-2001 extended header 1047db96d56Sopenharmony_ciXGLTYPE = b"g" # POSIX.1-2001 global header 1057db96d56Sopenharmony_ciSOLARIS_XHDTYPE = b"X" # Solaris extended header 1067db96d56Sopenharmony_ci 1077db96d56Sopenharmony_ciUSTAR_FORMAT = 0 # POSIX.1-1988 (ustar) format 1087db96d56Sopenharmony_ciGNU_FORMAT = 1 # GNU tar format 1097db96d56Sopenharmony_ciPAX_FORMAT = 2 # POSIX.1-2001 (pax) format 1107db96d56Sopenharmony_ciDEFAULT_FORMAT = PAX_FORMAT 1117db96d56Sopenharmony_ci 1127db96d56Sopenharmony_ci#--------------------------------------------------------- 1137db96d56Sopenharmony_ci# tarfile constants 1147db96d56Sopenharmony_ci#--------------------------------------------------------- 1157db96d56Sopenharmony_ci# File types that tarfile supports: 1167db96d56Sopenharmony_ciSUPPORTED_TYPES = (REGTYPE, AREGTYPE, LNKTYPE, 1177db96d56Sopenharmony_ci SYMTYPE, DIRTYPE, FIFOTYPE, 1187db96d56Sopenharmony_ci CONTTYPE, CHRTYPE, BLKTYPE, 1197db96d56Sopenharmony_ci GNUTYPE_LONGNAME, GNUTYPE_LONGLINK, 1207db96d56Sopenharmony_ci GNUTYPE_SPARSE) 1217db96d56Sopenharmony_ci 1227db96d56Sopenharmony_ci# File types that will be treated as a regular file. 1237db96d56Sopenharmony_ciREGULAR_TYPES = (REGTYPE, AREGTYPE, 1247db96d56Sopenharmony_ci CONTTYPE, GNUTYPE_SPARSE) 1257db96d56Sopenharmony_ci 1267db96d56Sopenharmony_ci# File types that are part of the GNU tar format. 1277db96d56Sopenharmony_ciGNU_TYPES = (GNUTYPE_LONGNAME, GNUTYPE_LONGLINK, 1287db96d56Sopenharmony_ci GNUTYPE_SPARSE) 1297db96d56Sopenharmony_ci 1307db96d56Sopenharmony_ci# Fields from a pax header that override a TarInfo attribute. 1317db96d56Sopenharmony_ciPAX_FIELDS = ("path", "linkpath", "size", "mtime", 1327db96d56Sopenharmony_ci "uid", "gid", "uname", "gname") 1337db96d56Sopenharmony_ci 1347db96d56Sopenharmony_ci# Fields from a pax header that are affected by hdrcharset. 1357db96d56Sopenharmony_ciPAX_NAME_FIELDS = {"path", "linkpath", "uname", "gname"} 1367db96d56Sopenharmony_ci 1377db96d56Sopenharmony_ci# Fields in a pax header that are numbers, all other fields 1387db96d56Sopenharmony_ci# are treated as strings. 1397db96d56Sopenharmony_ciPAX_NUMBER_FIELDS = { 1407db96d56Sopenharmony_ci "atime": float, 1417db96d56Sopenharmony_ci "ctime": float, 1427db96d56Sopenharmony_ci "mtime": float, 1437db96d56Sopenharmony_ci "uid": int, 1447db96d56Sopenharmony_ci "gid": int, 1457db96d56Sopenharmony_ci "size": int 1467db96d56Sopenharmony_ci} 1477db96d56Sopenharmony_ci 1487db96d56Sopenharmony_ci#--------------------------------------------------------- 1497db96d56Sopenharmony_ci# initialization 1507db96d56Sopenharmony_ci#--------------------------------------------------------- 1517db96d56Sopenharmony_ciif os.name == "nt": 1527db96d56Sopenharmony_ci ENCODING = "utf-8" 1537db96d56Sopenharmony_cielse: 1547db96d56Sopenharmony_ci ENCODING = sys.getfilesystemencoding() 1557db96d56Sopenharmony_ci 1567db96d56Sopenharmony_ci#--------------------------------------------------------- 1577db96d56Sopenharmony_ci# Some useful functions 1587db96d56Sopenharmony_ci#--------------------------------------------------------- 1597db96d56Sopenharmony_ci 1607db96d56Sopenharmony_cidef stn(s, length, encoding, errors): 1617db96d56Sopenharmony_ci """Convert a string to a null-terminated bytes object. 1627db96d56Sopenharmony_ci """ 1637db96d56Sopenharmony_ci if s is None: 1647db96d56Sopenharmony_ci raise ValueError("metadata cannot contain None") 1657db96d56Sopenharmony_ci s = s.encode(encoding, errors) 1667db96d56Sopenharmony_ci return s[:length] + (length - len(s)) * NUL 1677db96d56Sopenharmony_ci 1687db96d56Sopenharmony_cidef nts(s, encoding, errors): 1697db96d56Sopenharmony_ci """Convert a null-terminated bytes object to a string. 1707db96d56Sopenharmony_ci """ 1717db96d56Sopenharmony_ci p = s.find(b"\0") 1727db96d56Sopenharmony_ci if p != -1: 1737db96d56Sopenharmony_ci s = s[:p] 1747db96d56Sopenharmony_ci return s.decode(encoding, errors) 1757db96d56Sopenharmony_ci 1767db96d56Sopenharmony_cidef nti(s): 1777db96d56Sopenharmony_ci """Convert a number field to a python number. 1787db96d56Sopenharmony_ci """ 1797db96d56Sopenharmony_ci # There are two possible encodings for a number field, see 1807db96d56Sopenharmony_ci # itn() below. 1817db96d56Sopenharmony_ci if s[0] in (0o200, 0o377): 1827db96d56Sopenharmony_ci n = 0 1837db96d56Sopenharmony_ci for i in range(len(s) - 1): 1847db96d56Sopenharmony_ci n <<= 8 1857db96d56Sopenharmony_ci n += s[i + 1] 1867db96d56Sopenharmony_ci if s[0] == 0o377: 1877db96d56Sopenharmony_ci n = -(256 ** (len(s) - 1) - n) 1887db96d56Sopenharmony_ci else: 1897db96d56Sopenharmony_ci try: 1907db96d56Sopenharmony_ci s = nts(s, "ascii", "strict") 1917db96d56Sopenharmony_ci n = int(s.strip() or "0", 8) 1927db96d56Sopenharmony_ci except ValueError: 1937db96d56Sopenharmony_ci raise InvalidHeaderError("invalid header") 1947db96d56Sopenharmony_ci return n 1957db96d56Sopenharmony_ci 1967db96d56Sopenharmony_cidef itn(n, digits=8, format=DEFAULT_FORMAT): 1977db96d56Sopenharmony_ci """Convert a python number to a number field. 1987db96d56Sopenharmony_ci """ 1997db96d56Sopenharmony_ci # POSIX 1003.1-1988 requires numbers to be encoded as a string of 2007db96d56Sopenharmony_ci # octal digits followed by a null-byte, this allows values up to 2017db96d56Sopenharmony_ci # (8**(digits-1))-1. GNU tar allows storing numbers greater than 2027db96d56Sopenharmony_ci # that if necessary. A leading 0o200 or 0o377 byte indicate this 2037db96d56Sopenharmony_ci # particular encoding, the following digits-1 bytes are a big-endian 2047db96d56Sopenharmony_ci # base-256 representation. This allows values up to (256**(digits-1))-1. 2057db96d56Sopenharmony_ci # A 0o200 byte indicates a positive number, a 0o377 byte a negative 2067db96d56Sopenharmony_ci # number. 2077db96d56Sopenharmony_ci original_n = n 2087db96d56Sopenharmony_ci n = int(n) 2097db96d56Sopenharmony_ci if 0 <= n < 8 ** (digits - 1): 2107db96d56Sopenharmony_ci s = bytes("%0*o" % (digits - 1, n), "ascii") + NUL 2117db96d56Sopenharmony_ci elif format == GNU_FORMAT and -256 ** (digits - 1) <= n < 256 ** (digits - 1): 2127db96d56Sopenharmony_ci if n >= 0: 2137db96d56Sopenharmony_ci s = bytearray([0o200]) 2147db96d56Sopenharmony_ci else: 2157db96d56Sopenharmony_ci s = bytearray([0o377]) 2167db96d56Sopenharmony_ci n = 256 ** digits + n 2177db96d56Sopenharmony_ci 2187db96d56Sopenharmony_ci for i in range(digits - 1): 2197db96d56Sopenharmony_ci s.insert(1, n & 0o377) 2207db96d56Sopenharmony_ci n >>= 8 2217db96d56Sopenharmony_ci else: 2227db96d56Sopenharmony_ci raise ValueError("overflow in number field") 2237db96d56Sopenharmony_ci 2247db96d56Sopenharmony_ci return s 2257db96d56Sopenharmony_ci 2267db96d56Sopenharmony_cidef calc_chksums(buf): 2277db96d56Sopenharmony_ci """Calculate the checksum for a member's header by summing up all 2287db96d56Sopenharmony_ci characters except for the chksum field which is treated as if 2297db96d56Sopenharmony_ci it was filled with spaces. According to the GNU tar sources, 2307db96d56Sopenharmony_ci some tars (Sun and NeXT) calculate chksum with signed char, 2317db96d56Sopenharmony_ci which will be different if there are chars in the buffer with 2327db96d56Sopenharmony_ci the high bit set. So we calculate two checksums, unsigned and 2337db96d56Sopenharmony_ci signed. 2347db96d56Sopenharmony_ci """ 2357db96d56Sopenharmony_ci unsigned_chksum = 256 + sum(struct.unpack_from("148B8x356B", buf)) 2367db96d56Sopenharmony_ci signed_chksum = 256 + sum(struct.unpack_from("148b8x356b", buf)) 2377db96d56Sopenharmony_ci return unsigned_chksum, signed_chksum 2387db96d56Sopenharmony_ci 2397db96d56Sopenharmony_cidef copyfileobj(src, dst, length=None, exception=OSError, bufsize=None): 2407db96d56Sopenharmony_ci """Copy length bytes from fileobj src to fileobj dst. 2417db96d56Sopenharmony_ci If length is None, copy the entire content. 2427db96d56Sopenharmony_ci """ 2437db96d56Sopenharmony_ci bufsize = bufsize or 16 * 1024 2447db96d56Sopenharmony_ci if length == 0: 2457db96d56Sopenharmony_ci return 2467db96d56Sopenharmony_ci if length is None: 2477db96d56Sopenharmony_ci shutil.copyfileobj(src, dst, bufsize) 2487db96d56Sopenharmony_ci return 2497db96d56Sopenharmony_ci 2507db96d56Sopenharmony_ci blocks, remainder = divmod(length, bufsize) 2517db96d56Sopenharmony_ci for b in range(blocks): 2527db96d56Sopenharmony_ci buf = src.read(bufsize) 2537db96d56Sopenharmony_ci if len(buf) < bufsize: 2547db96d56Sopenharmony_ci raise exception("unexpected end of data") 2557db96d56Sopenharmony_ci dst.write(buf) 2567db96d56Sopenharmony_ci 2577db96d56Sopenharmony_ci if remainder != 0: 2587db96d56Sopenharmony_ci buf = src.read(remainder) 2597db96d56Sopenharmony_ci if len(buf) < remainder: 2607db96d56Sopenharmony_ci raise exception("unexpected end of data") 2617db96d56Sopenharmony_ci dst.write(buf) 2627db96d56Sopenharmony_ci return 2637db96d56Sopenharmony_ci 2647db96d56Sopenharmony_cidef _safe_print(s): 2657db96d56Sopenharmony_ci encoding = getattr(sys.stdout, 'encoding', None) 2667db96d56Sopenharmony_ci if encoding is not None: 2677db96d56Sopenharmony_ci s = s.encode(encoding, 'backslashreplace').decode(encoding) 2687db96d56Sopenharmony_ci print(s, end=' ') 2697db96d56Sopenharmony_ci 2707db96d56Sopenharmony_ci 2717db96d56Sopenharmony_ciclass TarError(Exception): 2727db96d56Sopenharmony_ci """Base exception.""" 2737db96d56Sopenharmony_ci pass 2747db96d56Sopenharmony_ciclass ExtractError(TarError): 2757db96d56Sopenharmony_ci """General exception for extract errors.""" 2767db96d56Sopenharmony_ci pass 2777db96d56Sopenharmony_ciclass ReadError(TarError): 2787db96d56Sopenharmony_ci """Exception for unreadable tar archives.""" 2797db96d56Sopenharmony_ci pass 2807db96d56Sopenharmony_ciclass CompressionError(TarError): 2817db96d56Sopenharmony_ci """Exception for unavailable compression methods.""" 2827db96d56Sopenharmony_ci pass 2837db96d56Sopenharmony_ciclass StreamError(TarError): 2847db96d56Sopenharmony_ci """Exception for unsupported operations on stream-like TarFiles.""" 2857db96d56Sopenharmony_ci pass 2867db96d56Sopenharmony_ciclass HeaderError(TarError): 2877db96d56Sopenharmony_ci """Base exception for header errors.""" 2887db96d56Sopenharmony_ci pass 2897db96d56Sopenharmony_ciclass EmptyHeaderError(HeaderError): 2907db96d56Sopenharmony_ci """Exception for empty headers.""" 2917db96d56Sopenharmony_ci pass 2927db96d56Sopenharmony_ciclass TruncatedHeaderError(HeaderError): 2937db96d56Sopenharmony_ci """Exception for truncated headers.""" 2947db96d56Sopenharmony_ci pass 2957db96d56Sopenharmony_ciclass EOFHeaderError(HeaderError): 2967db96d56Sopenharmony_ci """Exception for end of file headers.""" 2977db96d56Sopenharmony_ci pass 2987db96d56Sopenharmony_ciclass InvalidHeaderError(HeaderError): 2997db96d56Sopenharmony_ci """Exception for invalid headers.""" 3007db96d56Sopenharmony_ci pass 3017db96d56Sopenharmony_ciclass SubsequentHeaderError(HeaderError): 3027db96d56Sopenharmony_ci """Exception for missing and invalid extended headers.""" 3037db96d56Sopenharmony_ci pass 3047db96d56Sopenharmony_ci 3057db96d56Sopenharmony_ci#--------------------------- 3067db96d56Sopenharmony_ci# internal stream interface 3077db96d56Sopenharmony_ci#--------------------------- 3087db96d56Sopenharmony_ciclass _LowLevelFile: 3097db96d56Sopenharmony_ci """Low-level file object. Supports reading and writing. 3107db96d56Sopenharmony_ci It is used instead of a regular file object for streaming 3117db96d56Sopenharmony_ci access. 3127db96d56Sopenharmony_ci """ 3137db96d56Sopenharmony_ci 3147db96d56Sopenharmony_ci def __init__(self, name, mode): 3157db96d56Sopenharmony_ci mode = { 3167db96d56Sopenharmony_ci "r": os.O_RDONLY, 3177db96d56Sopenharmony_ci "w": os.O_WRONLY | os.O_CREAT | os.O_TRUNC, 3187db96d56Sopenharmony_ci }[mode] 3197db96d56Sopenharmony_ci if hasattr(os, "O_BINARY"): 3207db96d56Sopenharmony_ci mode |= os.O_BINARY 3217db96d56Sopenharmony_ci self.fd = os.open(name, mode, 0o666) 3227db96d56Sopenharmony_ci 3237db96d56Sopenharmony_ci def close(self): 3247db96d56Sopenharmony_ci os.close(self.fd) 3257db96d56Sopenharmony_ci 3267db96d56Sopenharmony_ci def read(self, size): 3277db96d56Sopenharmony_ci return os.read(self.fd, size) 3287db96d56Sopenharmony_ci 3297db96d56Sopenharmony_ci def write(self, s): 3307db96d56Sopenharmony_ci os.write(self.fd, s) 3317db96d56Sopenharmony_ci 3327db96d56Sopenharmony_ciclass _Stream: 3337db96d56Sopenharmony_ci """Class that serves as an adapter between TarFile and 3347db96d56Sopenharmony_ci a stream-like object. The stream-like object only 3357db96d56Sopenharmony_ci needs to have a read() or write() method and is accessed 3367db96d56Sopenharmony_ci blockwise. Use of gzip or bzip2 compression is possible. 3377db96d56Sopenharmony_ci A stream-like object could be for example: sys.stdin, 3387db96d56Sopenharmony_ci sys.stdout, a socket, a tape device etc. 3397db96d56Sopenharmony_ci 3407db96d56Sopenharmony_ci _Stream is intended to be used only internally. 3417db96d56Sopenharmony_ci """ 3427db96d56Sopenharmony_ci 3437db96d56Sopenharmony_ci def __init__(self, name, mode, comptype, fileobj, bufsize): 3447db96d56Sopenharmony_ci """Construct a _Stream object. 3457db96d56Sopenharmony_ci """ 3467db96d56Sopenharmony_ci self._extfileobj = True 3477db96d56Sopenharmony_ci if fileobj is None: 3487db96d56Sopenharmony_ci fileobj = _LowLevelFile(name, mode) 3497db96d56Sopenharmony_ci self._extfileobj = False 3507db96d56Sopenharmony_ci 3517db96d56Sopenharmony_ci if comptype == '*': 3527db96d56Sopenharmony_ci # Enable transparent compression detection for the 3537db96d56Sopenharmony_ci # stream interface 3547db96d56Sopenharmony_ci fileobj = _StreamProxy(fileobj) 3557db96d56Sopenharmony_ci comptype = fileobj.getcomptype() 3567db96d56Sopenharmony_ci 3577db96d56Sopenharmony_ci self.name = name or "" 3587db96d56Sopenharmony_ci self.mode = mode 3597db96d56Sopenharmony_ci self.comptype = comptype 3607db96d56Sopenharmony_ci self.fileobj = fileobj 3617db96d56Sopenharmony_ci self.bufsize = bufsize 3627db96d56Sopenharmony_ci self.buf = b"" 3637db96d56Sopenharmony_ci self.pos = 0 3647db96d56Sopenharmony_ci self.closed = False 3657db96d56Sopenharmony_ci 3667db96d56Sopenharmony_ci try: 3677db96d56Sopenharmony_ci if comptype == "gz": 3687db96d56Sopenharmony_ci try: 3697db96d56Sopenharmony_ci import zlib 3707db96d56Sopenharmony_ci except ImportError: 3717db96d56Sopenharmony_ci raise CompressionError("zlib module is not available") from None 3727db96d56Sopenharmony_ci self.zlib = zlib 3737db96d56Sopenharmony_ci self.crc = zlib.crc32(b"") 3747db96d56Sopenharmony_ci if mode == "r": 3757db96d56Sopenharmony_ci self._init_read_gz() 3767db96d56Sopenharmony_ci self.exception = zlib.error 3777db96d56Sopenharmony_ci else: 3787db96d56Sopenharmony_ci self._init_write_gz() 3797db96d56Sopenharmony_ci 3807db96d56Sopenharmony_ci elif comptype == "bz2": 3817db96d56Sopenharmony_ci try: 3827db96d56Sopenharmony_ci import bz2 3837db96d56Sopenharmony_ci except ImportError: 3847db96d56Sopenharmony_ci raise CompressionError("bz2 module is not available") from None 3857db96d56Sopenharmony_ci if mode == "r": 3867db96d56Sopenharmony_ci self.dbuf = b"" 3877db96d56Sopenharmony_ci self.cmp = bz2.BZ2Decompressor() 3887db96d56Sopenharmony_ci self.exception = OSError 3897db96d56Sopenharmony_ci else: 3907db96d56Sopenharmony_ci self.cmp = bz2.BZ2Compressor() 3917db96d56Sopenharmony_ci 3927db96d56Sopenharmony_ci elif comptype == "xz": 3937db96d56Sopenharmony_ci try: 3947db96d56Sopenharmony_ci import lzma 3957db96d56Sopenharmony_ci except ImportError: 3967db96d56Sopenharmony_ci raise CompressionError("lzma module is not available") from None 3977db96d56Sopenharmony_ci if mode == "r": 3987db96d56Sopenharmony_ci self.dbuf = b"" 3997db96d56Sopenharmony_ci self.cmp = lzma.LZMADecompressor() 4007db96d56Sopenharmony_ci self.exception = lzma.LZMAError 4017db96d56Sopenharmony_ci else: 4027db96d56Sopenharmony_ci self.cmp = lzma.LZMACompressor() 4037db96d56Sopenharmony_ci 4047db96d56Sopenharmony_ci elif comptype != "tar": 4057db96d56Sopenharmony_ci raise CompressionError("unknown compression type %r" % comptype) 4067db96d56Sopenharmony_ci 4077db96d56Sopenharmony_ci except: 4087db96d56Sopenharmony_ci if not self._extfileobj: 4097db96d56Sopenharmony_ci self.fileobj.close() 4107db96d56Sopenharmony_ci self.closed = True 4117db96d56Sopenharmony_ci raise 4127db96d56Sopenharmony_ci 4137db96d56Sopenharmony_ci def __del__(self): 4147db96d56Sopenharmony_ci if hasattr(self, "closed") and not self.closed: 4157db96d56Sopenharmony_ci self.close() 4167db96d56Sopenharmony_ci 4177db96d56Sopenharmony_ci def _init_write_gz(self): 4187db96d56Sopenharmony_ci """Initialize for writing with gzip compression. 4197db96d56Sopenharmony_ci """ 4207db96d56Sopenharmony_ci self.cmp = self.zlib.compressobj(9, self.zlib.DEFLATED, 4217db96d56Sopenharmony_ci -self.zlib.MAX_WBITS, 4227db96d56Sopenharmony_ci self.zlib.DEF_MEM_LEVEL, 4237db96d56Sopenharmony_ci 0) 4247db96d56Sopenharmony_ci timestamp = struct.pack("<L", int(time.time())) 4257db96d56Sopenharmony_ci self.__write(b"\037\213\010\010" + timestamp + b"\002\377") 4267db96d56Sopenharmony_ci if self.name.endswith(".gz"): 4277db96d56Sopenharmony_ci self.name = self.name[:-3] 4287db96d56Sopenharmony_ci # Honor "directory components removed" from RFC1952 4297db96d56Sopenharmony_ci self.name = os.path.basename(self.name) 4307db96d56Sopenharmony_ci # RFC1952 says we must use ISO-8859-1 for the FNAME field. 4317db96d56Sopenharmony_ci self.__write(self.name.encode("iso-8859-1", "replace") + NUL) 4327db96d56Sopenharmony_ci 4337db96d56Sopenharmony_ci def write(self, s): 4347db96d56Sopenharmony_ci """Write string s to the stream. 4357db96d56Sopenharmony_ci """ 4367db96d56Sopenharmony_ci if self.comptype == "gz": 4377db96d56Sopenharmony_ci self.crc = self.zlib.crc32(s, self.crc) 4387db96d56Sopenharmony_ci self.pos += len(s) 4397db96d56Sopenharmony_ci if self.comptype != "tar": 4407db96d56Sopenharmony_ci s = self.cmp.compress(s) 4417db96d56Sopenharmony_ci self.__write(s) 4427db96d56Sopenharmony_ci 4437db96d56Sopenharmony_ci def __write(self, s): 4447db96d56Sopenharmony_ci """Write string s to the stream if a whole new block 4457db96d56Sopenharmony_ci is ready to be written. 4467db96d56Sopenharmony_ci """ 4477db96d56Sopenharmony_ci self.buf += s 4487db96d56Sopenharmony_ci while len(self.buf) > self.bufsize: 4497db96d56Sopenharmony_ci self.fileobj.write(self.buf[:self.bufsize]) 4507db96d56Sopenharmony_ci self.buf = self.buf[self.bufsize:] 4517db96d56Sopenharmony_ci 4527db96d56Sopenharmony_ci def close(self): 4537db96d56Sopenharmony_ci """Close the _Stream object. No operation should be 4547db96d56Sopenharmony_ci done on it afterwards. 4557db96d56Sopenharmony_ci """ 4567db96d56Sopenharmony_ci if self.closed: 4577db96d56Sopenharmony_ci return 4587db96d56Sopenharmony_ci 4597db96d56Sopenharmony_ci self.closed = True 4607db96d56Sopenharmony_ci try: 4617db96d56Sopenharmony_ci if self.mode == "w" and self.comptype != "tar": 4627db96d56Sopenharmony_ci self.buf += self.cmp.flush() 4637db96d56Sopenharmony_ci 4647db96d56Sopenharmony_ci if self.mode == "w" and self.buf: 4657db96d56Sopenharmony_ci self.fileobj.write(self.buf) 4667db96d56Sopenharmony_ci self.buf = b"" 4677db96d56Sopenharmony_ci if self.comptype == "gz": 4687db96d56Sopenharmony_ci self.fileobj.write(struct.pack("<L", self.crc)) 4697db96d56Sopenharmony_ci self.fileobj.write(struct.pack("<L", self.pos & 0xffffFFFF)) 4707db96d56Sopenharmony_ci finally: 4717db96d56Sopenharmony_ci if not self._extfileobj: 4727db96d56Sopenharmony_ci self.fileobj.close() 4737db96d56Sopenharmony_ci 4747db96d56Sopenharmony_ci def _init_read_gz(self): 4757db96d56Sopenharmony_ci """Initialize for reading a gzip compressed fileobj. 4767db96d56Sopenharmony_ci """ 4777db96d56Sopenharmony_ci self.cmp = self.zlib.decompressobj(-self.zlib.MAX_WBITS) 4787db96d56Sopenharmony_ci self.dbuf = b"" 4797db96d56Sopenharmony_ci 4807db96d56Sopenharmony_ci # taken from gzip.GzipFile with some alterations 4817db96d56Sopenharmony_ci if self.__read(2) != b"\037\213": 4827db96d56Sopenharmony_ci raise ReadError("not a gzip file") 4837db96d56Sopenharmony_ci if self.__read(1) != b"\010": 4847db96d56Sopenharmony_ci raise CompressionError("unsupported compression method") 4857db96d56Sopenharmony_ci 4867db96d56Sopenharmony_ci flag = ord(self.__read(1)) 4877db96d56Sopenharmony_ci self.__read(6) 4887db96d56Sopenharmony_ci 4897db96d56Sopenharmony_ci if flag & 4: 4907db96d56Sopenharmony_ci xlen = ord(self.__read(1)) + 256 * ord(self.__read(1)) 4917db96d56Sopenharmony_ci self.read(xlen) 4927db96d56Sopenharmony_ci if flag & 8: 4937db96d56Sopenharmony_ci while True: 4947db96d56Sopenharmony_ci s = self.__read(1) 4957db96d56Sopenharmony_ci if not s or s == NUL: 4967db96d56Sopenharmony_ci break 4977db96d56Sopenharmony_ci if flag & 16: 4987db96d56Sopenharmony_ci while True: 4997db96d56Sopenharmony_ci s = self.__read(1) 5007db96d56Sopenharmony_ci if not s or s == NUL: 5017db96d56Sopenharmony_ci break 5027db96d56Sopenharmony_ci if flag & 2: 5037db96d56Sopenharmony_ci self.__read(2) 5047db96d56Sopenharmony_ci 5057db96d56Sopenharmony_ci def tell(self): 5067db96d56Sopenharmony_ci """Return the stream's file pointer position. 5077db96d56Sopenharmony_ci """ 5087db96d56Sopenharmony_ci return self.pos 5097db96d56Sopenharmony_ci 5107db96d56Sopenharmony_ci def seek(self, pos=0): 5117db96d56Sopenharmony_ci """Set the stream's file pointer to pos. Negative seeking 5127db96d56Sopenharmony_ci is forbidden. 5137db96d56Sopenharmony_ci """ 5147db96d56Sopenharmony_ci if pos - self.pos >= 0: 5157db96d56Sopenharmony_ci blocks, remainder = divmod(pos - self.pos, self.bufsize) 5167db96d56Sopenharmony_ci for i in range(blocks): 5177db96d56Sopenharmony_ci self.read(self.bufsize) 5187db96d56Sopenharmony_ci self.read(remainder) 5197db96d56Sopenharmony_ci else: 5207db96d56Sopenharmony_ci raise StreamError("seeking backwards is not allowed") 5217db96d56Sopenharmony_ci return self.pos 5227db96d56Sopenharmony_ci 5237db96d56Sopenharmony_ci def read(self, size): 5247db96d56Sopenharmony_ci """Return the next size number of bytes from the stream.""" 5257db96d56Sopenharmony_ci assert size is not None 5267db96d56Sopenharmony_ci buf = self._read(size) 5277db96d56Sopenharmony_ci self.pos += len(buf) 5287db96d56Sopenharmony_ci return buf 5297db96d56Sopenharmony_ci 5307db96d56Sopenharmony_ci def _read(self, size): 5317db96d56Sopenharmony_ci """Return size bytes from the stream. 5327db96d56Sopenharmony_ci """ 5337db96d56Sopenharmony_ci if self.comptype == "tar": 5347db96d56Sopenharmony_ci return self.__read(size) 5357db96d56Sopenharmony_ci 5367db96d56Sopenharmony_ci c = len(self.dbuf) 5377db96d56Sopenharmony_ci t = [self.dbuf] 5387db96d56Sopenharmony_ci while c < size: 5397db96d56Sopenharmony_ci # Skip underlying buffer to avoid unaligned double buffering. 5407db96d56Sopenharmony_ci if self.buf: 5417db96d56Sopenharmony_ci buf = self.buf 5427db96d56Sopenharmony_ci self.buf = b"" 5437db96d56Sopenharmony_ci else: 5447db96d56Sopenharmony_ci buf = self.fileobj.read(self.bufsize) 5457db96d56Sopenharmony_ci if not buf: 5467db96d56Sopenharmony_ci break 5477db96d56Sopenharmony_ci try: 5487db96d56Sopenharmony_ci buf = self.cmp.decompress(buf) 5497db96d56Sopenharmony_ci except self.exception as e: 5507db96d56Sopenharmony_ci raise ReadError("invalid compressed data") from e 5517db96d56Sopenharmony_ci t.append(buf) 5527db96d56Sopenharmony_ci c += len(buf) 5537db96d56Sopenharmony_ci t = b"".join(t) 5547db96d56Sopenharmony_ci self.dbuf = t[size:] 5557db96d56Sopenharmony_ci return t[:size] 5567db96d56Sopenharmony_ci 5577db96d56Sopenharmony_ci def __read(self, size): 5587db96d56Sopenharmony_ci """Return size bytes from stream. If internal buffer is empty, 5597db96d56Sopenharmony_ci read another block from the stream. 5607db96d56Sopenharmony_ci """ 5617db96d56Sopenharmony_ci c = len(self.buf) 5627db96d56Sopenharmony_ci t = [self.buf] 5637db96d56Sopenharmony_ci while c < size: 5647db96d56Sopenharmony_ci buf = self.fileobj.read(self.bufsize) 5657db96d56Sopenharmony_ci if not buf: 5667db96d56Sopenharmony_ci break 5677db96d56Sopenharmony_ci t.append(buf) 5687db96d56Sopenharmony_ci c += len(buf) 5697db96d56Sopenharmony_ci t = b"".join(t) 5707db96d56Sopenharmony_ci self.buf = t[size:] 5717db96d56Sopenharmony_ci return t[:size] 5727db96d56Sopenharmony_ci# class _Stream 5737db96d56Sopenharmony_ci 5747db96d56Sopenharmony_ciclass _StreamProxy(object): 5757db96d56Sopenharmony_ci """Small proxy class that enables transparent compression 5767db96d56Sopenharmony_ci detection for the Stream interface (mode 'r|*'). 5777db96d56Sopenharmony_ci """ 5787db96d56Sopenharmony_ci 5797db96d56Sopenharmony_ci def __init__(self, fileobj): 5807db96d56Sopenharmony_ci self.fileobj = fileobj 5817db96d56Sopenharmony_ci self.buf = self.fileobj.read(BLOCKSIZE) 5827db96d56Sopenharmony_ci 5837db96d56Sopenharmony_ci def read(self, size): 5847db96d56Sopenharmony_ci self.read = self.fileobj.read 5857db96d56Sopenharmony_ci return self.buf 5867db96d56Sopenharmony_ci 5877db96d56Sopenharmony_ci def getcomptype(self): 5887db96d56Sopenharmony_ci if self.buf.startswith(b"\x1f\x8b\x08"): 5897db96d56Sopenharmony_ci return "gz" 5907db96d56Sopenharmony_ci elif self.buf[0:3] == b"BZh" and self.buf[4:10] == b"1AY&SY": 5917db96d56Sopenharmony_ci return "bz2" 5927db96d56Sopenharmony_ci elif self.buf.startswith((b"\x5d\x00\x00\x80", b"\xfd7zXZ")): 5937db96d56Sopenharmony_ci return "xz" 5947db96d56Sopenharmony_ci else: 5957db96d56Sopenharmony_ci return "tar" 5967db96d56Sopenharmony_ci 5977db96d56Sopenharmony_ci def close(self): 5987db96d56Sopenharmony_ci self.fileobj.close() 5997db96d56Sopenharmony_ci# class StreamProxy 6007db96d56Sopenharmony_ci 6017db96d56Sopenharmony_ci#------------------------ 6027db96d56Sopenharmony_ci# Extraction file object 6037db96d56Sopenharmony_ci#------------------------ 6047db96d56Sopenharmony_ciclass _FileInFile(object): 6057db96d56Sopenharmony_ci """A thin wrapper around an existing file object that 6067db96d56Sopenharmony_ci provides a part of its data as an individual file 6077db96d56Sopenharmony_ci object. 6087db96d56Sopenharmony_ci """ 6097db96d56Sopenharmony_ci 6107db96d56Sopenharmony_ci def __init__(self, fileobj, offset, size, blockinfo=None): 6117db96d56Sopenharmony_ci self.fileobj = fileobj 6127db96d56Sopenharmony_ci self.offset = offset 6137db96d56Sopenharmony_ci self.size = size 6147db96d56Sopenharmony_ci self.position = 0 6157db96d56Sopenharmony_ci self.name = getattr(fileobj, "name", None) 6167db96d56Sopenharmony_ci self.closed = False 6177db96d56Sopenharmony_ci 6187db96d56Sopenharmony_ci if blockinfo is None: 6197db96d56Sopenharmony_ci blockinfo = [(0, size)] 6207db96d56Sopenharmony_ci 6217db96d56Sopenharmony_ci # Construct a map with data and zero blocks. 6227db96d56Sopenharmony_ci self.map_index = 0 6237db96d56Sopenharmony_ci self.map = [] 6247db96d56Sopenharmony_ci lastpos = 0 6257db96d56Sopenharmony_ci realpos = self.offset 6267db96d56Sopenharmony_ci for offset, size in blockinfo: 6277db96d56Sopenharmony_ci if offset > lastpos: 6287db96d56Sopenharmony_ci self.map.append((False, lastpos, offset, None)) 6297db96d56Sopenharmony_ci self.map.append((True, offset, offset + size, realpos)) 6307db96d56Sopenharmony_ci realpos += size 6317db96d56Sopenharmony_ci lastpos = offset + size 6327db96d56Sopenharmony_ci if lastpos < self.size: 6337db96d56Sopenharmony_ci self.map.append((False, lastpos, self.size, None)) 6347db96d56Sopenharmony_ci 6357db96d56Sopenharmony_ci def flush(self): 6367db96d56Sopenharmony_ci pass 6377db96d56Sopenharmony_ci 6387db96d56Sopenharmony_ci def readable(self): 6397db96d56Sopenharmony_ci return True 6407db96d56Sopenharmony_ci 6417db96d56Sopenharmony_ci def writable(self): 6427db96d56Sopenharmony_ci return False 6437db96d56Sopenharmony_ci 6447db96d56Sopenharmony_ci def seekable(self): 6457db96d56Sopenharmony_ci return self.fileobj.seekable() 6467db96d56Sopenharmony_ci 6477db96d56Sopenharmony_ci def tell(self): 6487db96d56Sopenharmony_ci """Return the current file position. 6497db96d56Sopenharmony_ci """ 6507db96d56Sopenharmony_ci return self.position 6517db96d56Sopenharmony_ci 6527db96d56Sopenharmony_ci def seek(self, position, whence=io.SEEK_SET): 6537db96d56Sopenharmony_ci """Seek to a position in the file. 6547db96d56Sopenharmony_ci """ 6557db96d56Sopenharmony_ci if whence == io.SEEK_SET: 6567db96d56Sopenharmony_ci self.position = min(max(position, 0), self.size) 6577db96d56Sopenharmony_ci elif whence == io.SEEK_CUR: 6587db96d56Sopenharmony_ci if position < 0: 6597db96d56Sopenharmony_ci self.position = max(self.position + position, 0) 6607db96d56Sopenharmony_ci else: 6617db96d56Sopenharmony_ci self.position = min(self.position + position, self.size) 6627db96d56Sopenharmony_ci elif whence == io.SEEK_END: 6637db96d56Sopenharmony_ci self.position = max(min(self.size + position, self.size), 0) 6647db96d56Sopenharmony_ci else: 6657db96d56Sopenharmony_ci raise ValueError("Invalid argument") 6667db96d56Sopenharmony_ci return self.position 6677db96d56Sopenharmony_ci 6687db96d56Sopenharmony_ci def read(self, size=None): 6697db96d56Sopenharmony_ci """Read data from the file. 6707db96d56Sopenharmony_ci """ 6717db96d56Sopenharmony_ci if size is None: 6727db96d56Sopenharmony_ci size = self.size - self.position 6737db96d56Sopenharmony_ci else: 6747db96d56Sopenharmony_ci size = min(size, self.size - self.position) 6757db96d56Sopenharmony_ci 6767db96d56Sopenharmony_ci buf = b"" 6777db96d56Sopenharmony_ci while size > 0: 6787db96d56Sopenharmony_ci while True: 6797db96d56Sopenharmony_ci data, start, stop, offset = self.map[self.map_index] 6807db96d56Sopenharmony_ci if start <= self.position < stop: 6817db96d56Sopenharmony_ci break 6827db96d56Sopenharmony_ci else: 6837db96d56Sopenharmony_ci self.map_index += 1 6847db96d56Sopenharmony_ci if self.map_index == len(self.map): 6857db96d56Sopenharmony_ci self.map_index = 0 6867db96d56Sopenharmony_ci length = min(size, stop - self.position) 6877db96d56Sopenharmony_ci if data: 6887db96d56Sopenharmony_ci self.fileobj.seek(offset + (self.position - start)) 6897db96d56Sopenharmony_ci b = self.fileobj.read(length) 6907db96d56Sopenharmony_ci if len(b) != length: 6917db96d56Sopenharmony_ci raise ReadError("unexpected end of data") 6927db96d56Sopenharmony_ci buf += b 6937db96d56Sopenharmony_ci else: 6947db96d56Sopenharmony_ci buf += NUL * length 6957db96d56Sopenharmony_ci size -= length 6967db96d56Sopenharmony_ci self.position += length 6977db96d56Sopenharmony_ci return buf 6987db96d56Sopenharmony_ci 6997db96d56Sopenharmony_ci def readinto(self, b): 7007db96d56Sopenharmony_ci buf = self.read(len(b)) 7017db96d56Sopenharmony_ci b[:len(buf)] = buf 7027db96d56Sopenharmony_ci return len(buf) 7037db96d56Sopenharmony_ci 7047db96d56Sopenharmony_ci def close(self): 7057db96d56Sopenharmony_ci self.closed = True 7067db96d56Sopenharmony_ci#class _FileInFile 7077db96d56Sopenharmony_ci 7087db96d56Sopenharmony_ciclass ExFileObject(io.BufferedReader): 7097db96d56Sopenharmony_ci 7107db96d56Sopenharmony_ci def __init__(self, tarfile, tarinfo): 7117db96d56Sopenharmony_ci fileobj = _FileInFile(tarfile.fileobj, tarinfo.offset_data, 7127db96d56Sopenharmony_ci tarinfo.size, tarinfo.sparse) 7137db96d56Sopenharmony_ci super().__init__(fileobj) 7147db96d56Sopenharmony_ci#class ExFileObject 7157db96d56Sopenharmony_ci 7167db96d56Sopenharmony_ci 7177db96d56Sopenharmony_ci#----------------------------- 7187db96d56Sopenharmony_ci# extraction filters (PEP 706) 7197db96d56Sopenharmony_ci#----------------------------- 7207db96d56Sopenharmony_ci 7217db96d56Sopenharmony_ciclass FilterError(TarError): 7227db96d56Sopenharmony_ci pass 7237db96d56Sopenharmony_ci 7247db96d56Sopenharmony_ciclass AbsolutePathError(FilterError): 7257db96d56Sopenharmony_ci def __init__(self, tarinfo): 7267db96d56Sopenharmony_ci self.tarinfo = tarinfo 7277db96d56Sopenharmony_ci super().__init__(f'member {tarinfo.name!r} has an absolute path') 7287db96d56Sopenharmony_ci 7297db96d56Sopenharmony_ciclass OutsideDestinationError(FilterError): 7307db96d56Sopenharmony_ci def __init__(self, tarinfo, path): 7317db96d56Sopenharmony_ci self.tarinfo = tarinfo 7327db96d56Sopenharmony_ci self._path = path 7337db96d56Sopenharmony_ci super().__init__(f'{tarinfo.name!r} would be extracted to {path!r}, ' 7347db96d56Sopenharmony_ci + 'which is outside the destination') 7357db96d56Sopenharmony_ci 7367db96d56Sopenharmony_ciclass SpecialFileError(FilterError): 7377db96d56Sopenharmony_ci def __init__(self, tarinfo): 7387db96d56Sopenharmony_ci self.tarinfo = tarinfo 7397db96d56Sopenharmony_ci super().__init__(f'{tarinfo.name!r} is a special file') 7407db96d56Sopenharmony_ci 7417db96d56Sopenharmony_ciclass AbsoluteLinkError(FilterError): 7427db96d56Sopenharmony_ci def __init__(self, tarinfo): 7437db96d56Sopenharmony_ci self.tarinfo = tarinfo 7447db96d56Sopenharmony_ci super().__init__(f'{tarinfo.name!r} is a symlink to an absolute path') 7457db96d56Sopenharmony_ci 7467db96d56Sopenharmony_ciclass LinkOutsideDestinationError(FilterError): 7477db96d56Sopenharmony_ci def __init__(self, tarinfo, path): 7487db96d56Sopenharmony_ci self.tarinfo = tarinfo 7497db96d56Sopenharmony_ci self._path = path 7507db96d56Sopenharmony_ci super().__init__(f'{tarinfo.name!r} would link to {path!r}, ' 7517db96d56Sopenharmony_ci + 'which is outside the destination') 7527db96d56Sopenharmony_ci 7537db96d56Sopenharmony_cidef _get_filtered_attrs(member, dest_path, for_data=True): 7547db96d56Sopenharmony_ci new_attrs = {} 7557db96d56Sopenharmony_ci name = member.name 7567db96d56Sopenharmony_ci dest_path = os.path.realpath(dest_path) 7577db96d56Sopenharmony_ci # Strip leading / (tar's directory separator) from filenames. 7587db96d56Sopenharmony_ci # Include os.sep (target OS directory separator) as well. 7597db96d56Sopenharmony_ci if name.startswith(('/', os.sep)): 7607db96d56Sopenharmony_ci name = new_attrs['name'] = member.path.lstrip('/' + os.sep) 7617db96d56Sopenharmony_ci if os.path.isabs(name): 7627db96d56Sopenharmony_ci # Path is absolute even after stripping. 7637db96d56Sopenharmony_ci # For example, 'C:/foo' on Windows. 7647db96d56Sopenharmony_ci raise AbsolutePathError(member) 7657db96d56Sopenharmony_ci # Ensure we stay in the destination 7667db96d56Sopenharmony_ci target_path = os.path.realpath(os.path.join(dest_path, name)) 7677db96d56Sopenharmony_ci if os.path.commonpath([target_path, dest_path]) != dest_path: 7687db96d56Sopenharmony_ci raise OutsideDestinationError(member, target_path) 7697db96d56Sopenharmony_ci # Limit permissions (no high bits, and go-w) 7707db96d56Sopenharmony_ci mode = member.mode 7717db96d56Sopenharmony_ci if mode is not None: 7727db96d56Sopenharmony_ci # Strip high bits & group/other write bits 7737db96d56Sopenharmony_ci mode = mode & 0o755 7747db96d56Sopenharmony_ci if for_data: 7757db96d56Sopenharmony_ci # For data, handle permissions & file types 7767db96d56Sopenharmony_ci if member.isreg() or member.islnk(): 7777db96d56Sopenharmony_ci if not mode & 0o100: 7787db96d56Sopenharmony_ci # Clear executable bits if not executable by user 7797db96d56Sopenharmony_ci mode &= ~0o111 7807db96d56Sopenharmony_ci # Ensure owner can read & write 7817db96d56Sopenharmony_ci mode |= 0o600 7827db96d56Sopenharmony_ci elif member.isdir() or member.issym(): 7837db96d56Sopenharmony_ci # Ignore mode for directories & symlinks 7847db96d56Sopenharmony_ci mode = None 7857db96d56Sopenharmony_ci else: 7867db96d56Sopenharmony_ci # Reject special files 7877db96d56Sopenharmony_ci raise SpecialFileError(member) 7887db96d56Sopenharmony_ci if mode != member.mode: 7897db96d56Sopenharmony_ci new_attrs['mode'] = mode 7907db96d56Sopenharmony_ci if for_data: 7917db96d56Sopenharmony_ci # Ignore ownership for 'data' 7927db96d56Sopenharmony_ci if member.uid is not None: 7937db96d56Sopenharmony_ci new_attrs['uid'] = None 7947db96d56Sopenharmony_ci if member.gid is not None: 7957db96d56Sopenharmony_ci new_attrs['gid'] = None 7967db96d56Sopenharmony_ci if member.uname is not None: 7977db96d56Sopenharmony_ci new_attrs['uname'] = None 7987db96d56Sopenharmony_ci if member.gname is not None: 7997db96d56Sopenharmony_ci new_attrs['gname'] = None 8007db96d56Sopenharmony_ci # Check link destination for 'data' 8017db96d56Sopenharmony_ci if member.islnk() or member.issym(): 8027db96d56Sopenharmony_ci if os.path.isabs(member.linkname): 8037db96d56Sopenharmony_ci raise AbsoluteLinkError(member) 8047db96d56Sopenharmony_ci target_path = os.path.realpath(os.path.join(dest_path, member.linkname)) 8057db96d56Sopenharmony_ci if os.path.commonpath([target_path, dest_path]) != dest_path: 8067db96d56Sopenharmony_ci raise LinkOutsideDestinationError(member, target_path) 8077db96d56Sopenharmony_ci return new_attrs 8087db96d56Sopenharmony_ci 8097db96d56Sopenharmony_cidef fully_trusted_filter(member, dest_path): 8107db96d56Sopenharmony_ci return member 8117db96d56Sopenharmony_ci 8127db96d56Sopenharmony_cidef tar_filter(member, dest_path): 8137db96d56Sopenharmony_ci new_attrs = _get_filtered_attrs(member, dest_path, False) 8147db96d56Sopenharmony_ci if new_attrs: 8157db96d56Sopenharmony_ci return member.replace(**new_attrs, deep=False) 8167db96d56Sopenharmony_ci return member 8177db96d56Sopenharmony_ci 8187db96d56Sopenharmony_cidef data_filter(member, dest_path): 8197db96d56Sopenharmony_ci new_attrs = _get_filtered_attrs(member, dest_path, True) 8207db96d56Sopenharmony_ci if new_attrs: 8217db96d56Sopenharmony_ci return member.replace(**new_attrs, deep=False) 8227db96d56Sopenharmony_ci return member 8237db96d56Sopenharmony_ci 8247db96d56Sopenharmony_ci_NAMED_FILTERS = { 8257db96d56Sopenharmony_ci "fully_trusted": fully_trusted_filter, 8267db96d56Sopenharmony_ci "tar": tar_filter, 8277db96d56Sopenharmony_ci "data": data_filter, 8287db96d56Sopenharmony_ci} 8297db96d56Sopenharmony_ci 8307db96d56Sopenharmony_ci#------------------ 8317db96d56Sopenharmony_ci# Exported Classes 8327db96d56Sopenharmony_ci#------------------ 8337db96d56Sopenharmony_ci 8347db96d56Sopenharmony_ci# Sentinel for replace() defaults, meaning "don't change the attribute" 8357db96d56Sopenharmony_ci_KEEP = object() 8367db96d56Sopenharmony_ci 8377db96d56Sopenharmony_ci# Header length is digits followed by a space. 8387db96d56Sopenharmony_ci_header_length_prefix_re = re.compile(br"([0-9]{1,20}) ") 8397db96d56Sopenharmony_ci 8407db96d56Sopenharmony_ciclass TarInfo(object): 8417db96d56Sopenharmony_ci """Informational class which holds the details about an 8427db96d56Sopenharmony_ci archive member given by a tar header block. 8437db96d56Sopenharmony_ci TarInfo objects are returned by TarFile.getmember(), 8447db96d56Sopenharmony_ci TarFile.getmembers() and TarFile.gettarinfo() and are 8457db96d56Sopenharmony_ci usually created internally. 8467db96d56Sopenharmony_ci """ 8477db96d56Sopenharmony_ci 8487db96d56Sopenharmony_ci __slots__ = dict( 8497db96d56Sopenharmony_ci name = 'Name of the archive member.', 8507db96d56Sopenharmony_ci mode = 'Permission bits.', 8517db96d56Sopenharmony_ci uid = 'User ID of the user who originally stored this member.', 8527db96d56Sopenharmony_ci gid = 'Group ID of the user who originally stored this member.', 8537db96d56Sopenharmony_ci size = 'Size in bytes.', 8547db96d56Sopenharmony_ci mtime = 'Time of last modification.', 8557db96d56Sopenharmony_ci chksum = 'Header checksum.', 8567db96d56Sopenharmony_ci type = ('File type. type is usually one of these constants: ' 8577db96d56Sopenharmony_ci 'REGTYPE, AREGTYPE, LNKTYPE, SYMTYPE, DIRTYPE, FIFOTYPE, ' 8587db96d56Sopenharmony_ci 'CONTTYPE, CHRTYPE, BLKTYPE, GNUTYPE_SPARSE.'), 8597db96d56Sopenharmony_ci linkname = ('Name of the target file name, which is only present ' 8607db96d56Sopenharmony_ci 'in TarInfo objects of type LNKTYPE and SYMTYPE.'), 8617db96d56Sopenharmony_ci uname = 'User name.', 8627db96d56Sopenharmony_ci gname = 'Group name.', 8637db96d56Sopenharmony_ci devmajor = 'Device major number.', 8647db96d56Sopenharmony_ci devminor = 'Device minor number.', 8657db96d56Sopenharmony_ci offset = 'The tar header starts here.', 8667db96d56Sopenharmony_ci offset_data = "The file's data starts here.", 8677db96d56Sopenharmony_ci pax_headers = ('A dictionary containing key-value pairs of an ' 8687db96d56Sopenharmony_ci 'associated pax extended header.'), 8697db96d56Sopenharmony_ci sparse = 'Sparse member information.', 8707db96d56Sopenharmony_ci tarfile = None, 8717db96d56Sopenharmony_ci _sparse_structs = None, 8727db96d56Sopenharmony_ci _link_target = None, 8737db96d56Sopenharmony_ci ) 8747db96d56Sopenharmony_ci 8757db96d56Sopenharmony_ci def __init__(self, name=""): 8767db96d56Sopenharmony_ci """Construct a TarInfo object. name is the optional name 8777db96d56Sopenharmony_ci of the member. 8787db96d56Sopenharmony_ci """ 8797db96d56Sopenharmony_ci self.name = name # member name 8807db96d56Sopenharmony_ci self.mode = 0o644 # file permissions 8817db96d56Sopenharmony_ci self.uid = 0 # user id 8827db96d56Sopenharmony_ci self.gid = 0 # group id 8837db96d56Sopenharmony_ci self.size = 0 # file size 8847db96d56Sopenharmony_ci self.mtime = 0 # modification time 8857db96d56Sopenharmony_ci self.chksum = 0 # header checksum 8867db96d56Sopenharmony_ci self.type = REGTYPE # member type 8877db96d56Sopenharmony_ci self.linkname = "" # link name 8887db96d56Sopenharmony_ci self.uname = "" # user name 8897db96d56Sopenharmony_ci self.gname = "" # group name 8907db96d56Sopenharmony_ci self.devmajor = 0 # device major number 8917db96d56Sopenharmony_ci self.devminor = 0 # device minor number 8927db96d56Sopenharmony_ci 8937db96d56Sopenharmony_ci self.offset = 0 # the tar header starts here 8947db96d56Sopenharmony_ci self.offset_data = 0 # the file's data starts here 8957db96d56Sopenharmony_ci 8967db96d56Sopenharmony_ci self.sparse = None # sparse member information 8977db96d56Sopenharmony_ci self.pax_headers = {} # pax header information 8987db96d56Sopenharmony_ci 8997db96d56Sopenharmony_ci @property 9007db96d56Sopenharmony_ci def path(self): 9017db96d56Sopenharmony_ci 'In pax headers, "name" is called "path".' 9027db96d56Sopenharmony_ci return self.name 9037db96d56Sopenharmony_ci 9047db96d56Sopenharmony_ci @path.setter 9057db96d56Sopenharmony_ci def path(self, name): 9067db96d56Sopenharmony_ci self.name = name 9077db96d56Sopenharmony_ci 9087db96d56Sopenharmony_ci @property 9097db96d56Sopenharmony_ci def linkpath(self): 9107db96d56Sopenharmony_ci 'In pax headers, "linkname" is called "linkpath".' 9117db96d56Sopenharmony_ci return self.linkname 9127db96d56Sopenharmony_ci 9137db96d56Sopenharmony_ci @linkpath.setter 9147db96d56Sopenharmony_ci def linkpath(self, linkname): 9157db96d56Sopenharmony_ci self.linkname = linkname 9167db96d56Sopenharmony_ci 9177db96d56Sopenharmony_ci def __repr__(self): 9187db96d56Sopenharmony_ci return "<%s %r at %#x>" % (self.__class__.__name__,self.name,id(self)) 9197db96d56Sopenharmony_ci 9207db96d56Sopenharmony_ci def replace(self, *, 9217db96d56Sopenharmony_ci name=_KEEP, mtime=_KEEP, mode=_KEEP, linkname=_KEEP, 9227db96d56Sopenharmony_ci uid=_KEEP, gid=_KEEP, uname=_KEEP, gname=_KEEP, 9237db96d56Sopenharmony_ci deep=True, _KEEP=_KEEP): 9247db96d56Sopenharmony_ci """Return a deep copy of self with the given attributes replaced. 9257db96d56Sopenharmony_ci """ 9267db96d56Sopenharmony_ci if deep: 9277db96d56Sopenharmony_ci result = copy.deepcopy(self) 9287db96d56Sopenharmony_ci else: 9297db96d56Sopenharmony_ci result = copy.copy(self) 9307db96d56Sopenharmony_ci if name is not _KEEP: 9317db96d56Sopenharmony_ci result.name = name 9327db96d56Sopenharmony_ci if mtime is not _KEEP: 9337db96d56Sopenharmony_ci result.mtime = mtime 9347db96d56Sopenharmony_ci if mode is not _KEEP: 9357db96d56Sopenharmony_ci result.mode = mode 9367db96d56Sopenharmony_ci if linkname is not _KEEP: 9377db96d56Sopenharmony_ci result.linkname = linkname 9387db96d56Sopenharmony_ci if uid is not _KEEP: 9397db96d56Sopenharmony_ci result.uid = uid 9407db96d56Sopenharmony_ci if gid is not _KEEP: 9417db96d56Sopenharmony_ci result.gid = gid 9427db96d56Sopenharmony_ci if uname is not _KEEP: 9437db96d56Sopenharmony_ci result.uname = uname 9447db96d56Sopenharmony_ci if gname is not _KEEP: 9457db96d56Sopenharmony_ci result.gname = gname 9467db96d56Sopenharmony_ci return result 9477db96d56Sopenharmony_ci 9487db96d56Sopenharmony_ci def get_info(self): 9497db96d56Sopenharmony_ci """Return the TarInfo's attributes as a dictionary. 9507db96d56Sopenharmony_ci """ 9517db96d56Sopenharmony_ci if self.mode is None: 9527db96d56Sopenharmony_ci mode = None 9537db96d56Sopenharmony_ci else: 9547db96d56Sopenharmony_ci mode = self.mode & 0o7777 9557db96d56Sopenharmony_ci info = { 9567db96d56Sopenharmony_ci "name": self.name, 9577db96d56Sopenharmony_ci "mode": mode, 9587db96d56Sopenharmony_ci "uid": self.uid, 9597db96d56Sopenharmony_ci "gid": self.gid, 9607db96d56Sopenharmony_ci "size": self.size, 9617db96d56Sopenharmony_ci "mtime": self.mtime, 9627db96d56Sopenharmony_ci "chksum": self.chksum, 9637db96d56Sopenharmony_ci "type": self.type, 9647db96d56Sopenharmony_ci "linkname": self.linkname, 9657db96d56Sopenharmony_ci "uname": self.uname, 9667db96d56Sopenharmony_ci "gname": self.gname, 9677db96d56Sopenharmony_ci "devmajor": self.devmajor, 9687db96d56Sopenharmony_ci "devminor": self.devminor 9697db96d56Sopenharmony_ci } 9707db96d56Sopenharmony_ci 9717db96d56Sopenharmony_ci if info["type"] == DIRTYPE and not info["name"].endswith("/"): 9727db96d56Sopenharmony_ci info["name"] += "/" 9737db96d56Sopenharmony_ci 9747db96d56Sopenharmony_ci return info 9757db96d56Sopenharmony_ci 9767db96d56Sopenharmony_ci def tobuf(self, format=DEFAULT_FORMAT, encoding=ENCODING, errors="surrogateescape"): 9777db96d56Sopenharmony_ci """Return a tar header as a string of 512 byte blocks. 9787db96d56Sopenharmony_ci """ 9797db96d56Sopenharmony_ci info = self.get_info() 9807db96d56Sopenharmony_ci for name, value in info.items(): 9817db96d56Sopenharmony_ci if value is None: 9827db96d56Sopenharmony_ci raise ValueError("%s may not be None" % name) 9837db96d56Sopenharmony_ci 9847db96d56Sopenharmony_ci if format == USTAR_FORMAT: 9857db96d56Sopenharmony_ci return self.create_ustar_header(info, encoding, errors) 9867db96d56Sopenharmony_ci elif format == GNU_FORMAT: 9877db96d56Sopenharmony_ci return self.create_gnu_header(info, encoding, errors) 9887db96d56Sopenharmony_ci elif format == PAX_FORMAT: 9897db96d56Sopenharmony_ci return self.create_pax_header(info, encoding) 9907db96d56Sopenharmony_ci else: 9917db96d56Sopenharmony_ci raise ValueError("invalid format") 9927db96d56Sopenharmony_ci 9937db96d56Sopenharmony_ci def create_ustar_header(self, info, encoding, errors): 9947db96d56Sopenharmony_ci """Return the object as a ustar header block. 9957db96d56Sopenharmony_ci """ 9967db96d56Sopenharmony_ci info["magic"] = POSIX_MAGIC 9977db96d56Sopenharmony_ci 9987db96d56Sopenharmony_ci if len(info["linkname"].encode(encoding, errors)) > LENGTH_LINK: 9997db96d56Sopenharmony_ci raise ValueError("linkname is too long") 10007db96d56Sopenharmony_ci 10017db96d56Sopenharmony_ci if len(info["name"].encode(encoding, errors)) > LENGTH_NAME: 10027db96d56Sopenharmony_ci info["prefix"], info["name"] = self._posix_split_name(info["name"], encoding, errors) 10037db96d56Sopenharmony_ci 10047db96d56Sopenharmony_ci return self._create_header(info, USTAR_FORMAT, encoding, errors) 10057db96d56Sopenharmony_ci 10067db96d56Sopenharmony_ci def create_gnu_header(self, info, encoding, errors): 10077db96d56Sopenharmony_ci """Return the object as a GNU header block sequence. 10087db96d56Sopenharmony_ci """ 10097db96d56Sopenharmony_ci info["magic"] = GNU_MAGIC 10107db96d56Sopenharmony_ci 10117db96d56Sopenharmony_ci buf = b"" 10127db96d56Sopenharmony_ci if len(info["linkname"].encode(encoding, errors)) > LENGTH_LINK: 10137db96d56Sopenharmony_ci buf += self._create_gnu_long_header(info["linkname"], GNUTYPE_LONGLINK, encoding, errors) 10147db96d56Sopenharmony_ci 10157db96d56Sopenharmony_ci if len(info["name"].encode(encoding, errors)) > LENGTH_NAME: 10167db96d56Sopenharmony_ci buf += self._create_gnu_long_header(info["name"], GNUTYPE_LONGNAME, encoding, errors) 10177db96d56Sopenharmony_ci 10187db96d56Sopenharmony_ci return buf + self._create_header(info, GNU_FORMAT, encoding, errors) 10197db96d56Sopenharmony_ci 10207db96d56Sopenharmony_ci def create_pax_header(self, info, encoding): 10217db96d56Sopenharmony_ci """Return the object as a ustar header block. If it cannot be 10227db96d56Sopenharmony_ci represented this way, prepend a pax extended header sequence 10237db96d56Sopenharmony_ci with supplement information. 10247db96d56Sopenharmony_ci """ 10257db96d56Sopenharmony_ci info["magic"] = POSIX_MAGIC 10267db96d56Sopenharmony_ci pax_headers = self.pax_headers.copy() 10277db96d56Sopenharmony_ci 10287db96d56Sopenharmony_ci # Test string fields for values that exceed the field length or cannot 10297db96d56Sopenharmony_ci # be represented in ASCII encoding. 10307db96d56Sopenharmony_ci for name, hname, length in ( 10317db96d56Sopenharmony_ci ("name", "path", LENGTH_NAME), ("linkname", "linkpath", LENGTH_LINK), 10327db96d56Sopenharmony_ci ("uname", "uname", 32), ("gname", "gname", 32)): 10337db96d56Sopenharmony_ci 10347db96d56Sopenharmony_ci if hname in pax_headers: 10357db96d56Sopenharmony_ci # The pax header has priority. 10367db96d56Sopenharmony_ci continue 10377db96d56Sopenharmony_ci 10387db96d56Sopenharmony_ci # Try to encode the string as ASCII. 10397db96d56Sopenharmony_ci try: 10407db96d56Sopenharmony_ci info[name].encode("ascii", "strict") 10417db96d56Sopenharmony_ci except UnicodeEncodeError: 10427db96d56Sopenharmony_ci pax_headers[hname] = info[name] 10437db96d56Sopenharmony_ci continue 10447db96d56Sopenharmony_ci 10457db96d56Sopenharmony_ci if len(info[name]) > length: 10467db96d56Sopenharmony_ci pax_headers[hname] = info[name] 10477db96d56Sopenharmony_ci 10487db96d56Sopenharmony_ci # Test number fields for values that exceed the field limit or values 10497db96d56Sopenharmony_ci # that like to be stored as float. 10507db96d56Sopenharmony_ci for name, digits in (("uid", 8), ("gid", 8), ("size", 12), ("mtime", 12)): 10517db96d56Sopenharmony_ci needs_pax = False 10527db96d56Sopenharmony_ci 10537db96d56Sopenharmony_ci val = info[name] 10547db96d56Sopenharmony_ci val_is_float = isinstance(val, float) 10557db96d56Sopenharmony_ci val_int = round(val) if val_is_float else val 10567db96d56Sopenharmony_ci if not 0 <= val_int < 8 ** (digits - 1): 10577db96d56Sopenharmony_ci # Avoid overflow. 10587db96d56Sopenharmony_ci info[name] = 0 10597db96d56Sopenharmony_ci needs_pax = True 10607db96d56Sopenharmony_ci elif val_is_float: 10617db96d56Sopenharmony_ci # Put rounded value in ustar header, and full 10627db96d56Sopenharmony_ci # precision value in pax header. 10637db96d56Sopenharmony_ci info[name] = val_int 10647db96d56Sopenharmony_ci needs_pax = True 10657db96d56Sopenharmony_ci 10667db96d56Sopenharmony_ci # The existing pax header has priority. 10677db96d56Sopenharmony_ci if needs_pax and name not in pax_headers: 10687db96d56Sopenharmony_ci pax_headers[name] = str(val) 10697db96d56Sopenharmony_ci 10707db96d56Sopenharmony_ci # Create a pax extended header if necessary. 10717db96d56Sopenharmony_ci if pax_headers: 10727db96d56Sopenharmony_ci buf = self._create_pax_generic_header(pax_headers, XHDTYPE, encoding) 10737db96d56Sopenharmony_ci else: 10747db96d56Sopenharmony_ci buf = b"" 10757db96d56Sopenharmony_ci 10767db96d56Sopenharmony_ci return buf + self._create_header(info, USTAR_FORMAT, "ascii", "replace") 10777db96d56Sopenharmony_ci 10787db96d56Sopenharmony_ci @classmethod 10797db96d56Sopenharmony_ci def create_pax_global_header(cls, pax_headers): 10807db96d56Sopenharmony_ci """Return the object as a pax global header block sequence. 10817db96d56Sopenharmony_ci """ 10827db96d56Sopenharmony_ci return cls._create_pax_generic_header(pax_headers, XGLTYPE, "utf-8") 10837db96d56Sopenharmony_ci 10847db96d56Sopenharmony_ci def _posix_split_name(self, name, encoding, errors): 10857db96d56Sopenharmony_ci """Split a name longer than 100 chars into a prefix 10867db96d56Sopenharmony_ci and a name part. 10877db96d56Sopenharmony_ci """ 10887db96d56Sopenharmony_ci components = name.split("/") 10897db96d56Sopenharmony_ci for i in range(1, len(components)): 10907db96d56Sopenharmony_ci prefix = "/".join(components[:i]) 10917db96d56Sopenharmony_ci name = "/".join(components[i:]) 10927db96d56Sopenharmony_ci if len(prefix.encode(encoding, errors)) <= LENGTH_PREFIX and \ 10937db96d56Sopenharmony_ci len(name.encode(encoding, errors)) <= LENGTH_NAME: 10947db96d56Sopenharmony_ci break 10957db96d56Sopenharmony_ci else: 10967db96d56Sopenharmony_ci raise ValueError("name is too long") 10977db96d56Sopenharmony_ci 10987db96d56Sopenharmony_ci return prefix, name 10997db96d56Sopenharmony_ci 11007db96d56Sopenharmony_ci @staticmethod 11017db96d56Sopenharmony_ci def _create_header(info, format, encoding, errors): 11027db96d56Sopenharmony_ci """Return a header block. info is a dictionary with file 11037db96d56Sopenharmony_ci information, format must be one of the *_FORMAT constants. 11047db96d56Sopenharmony_ci """ 11057db96d56Sopenharmony_ci has_device_fields = info.get("type") in (CHRTYPE, BLKTYPE) 11067db96d56Sopenharmony_ci if has_device_fields: 11077db96d56Sopenharmony_ci devmajor = itn(info.get("devmajor", 0), 8, format) 11087db96d56Sopenharmony_ci devminor = itn(info.get("devminor", 0), 8, format) 11097db96d56Sopenharmony_ci else: 11107db96d56Sopenharmony_ci devmajor = stn("", 8, encoding, errors) 11117db96d56Sopenharmony_ci devminor = stn("", 8, encoding, errors) 11127db96d56Sopenharmony_ci 11137db96d56Sopenharmony_ci # None values in metadata should cause ValueError. 11147db96d56Sopenharmony_ci # itn()/stn() do this for all fields except type. 11157db96d56Sopenharmony_ci filetype = info.get("type", REGTYPE) 11167db96d56Sopenharmony_ci if filetype is None: 11177db96d56Sopenharmony_ci raise ValueError("TarInfo.type must not be None") 11187db96d56Sopenharmony_ci 11197db96d56Sopenharmony_ci parts = [ 11207db96d56Sopenharmony_ci stn(info.get("name", ""), 100, encoding, errors), 11217db96d56Sopenharmony_ci itn(info.get("mode", 0) & 0o7777, 8, format), 11227db96d56Sopenharmony_ci itn(info.get("uid", 0), 8, format), 11237db96d56Sopenharmony_ci itn(info.get("gid", 0), 8, format), 11247db96d56Sopenharmony_ci itn(info.get("size", 0), 12, format), 11257db96d56Sopenharmony_ci itn(info.get("mtime", 0), 12, format), 11267db96d56Sopenharmony_ci b" ", # checksum field 11277db96d56Sopenharmony_ci filetype, 11287db96d56Sopenharmony_ci stn(info.get("linkname", ""), 100, encoding, errors), 11297db96d56Sopenharmony_ci info.get("magic", POSIX_MAGIC), 11307db96d56Sopenharmony_ci stn(info.get("uname", ""), 32, encoding, errors), 11317db96d56Sopenharmony_ci stn(info.get("gname", ""), 32, encoding, errors), 11327db96d56Sopenharmony_ci devmajor, 11337db96d56Sopenharmony_ci devminor, 11347db96d56Sopenharmony_ci stn(info.get("prefix", ""), 155, encoding, errors) 11357db96d56Sopenharmony_ci ] 11367db96d56Sopenharmony_ci 11377db96d56Sopenharmony_ci buf = struct.pack("%ds" % BLOCKSIZE, b"".join(parts)) 11387db96d56Sopenharmony_ci chksum = calc_chksums(buf[-BLOCKSIZE:])[0] 11397db96d56Sopenharmony_ci buf = buf[:-364] + bytes("%06o\0" % chksum, "ascii") + buf[-357:] 11407db96d56Sopenharmony_ci return buf 11417db96d56Sopenharmony_ci 11427db96d56Sopenharmony_ci @staticmethod 11437db96d56Sopenharmony_ci def _create_payload(payload): 11447db96d56Sopenharmony_ci """Return the string payload filled with zero bytes 11457db96d56Sopenharmony_ci up to the next 512 byte border. 11467db96d56Sopenharmony_ci """ 11477db96d56Sopenharmony_ci blocks, remainder = divmod(len(payload), BLOCKSIZE) 11487db96d56Sopenharmony_ci if remainder > 0: 11497db96d56Sopenharmony_ci payload += (BLOCKSIZE - remainder) * NUL 11507db96d56Sopenharmony_ci return payload 11517db96d56Sopenharmony_ci 11527db96d56Sopenharmony_ci @classmethod 11537db96d56Sopenharmony_ci def _create_gnu_long_header(cls, name, type, encoding, errors): 11547db96d56Sopenharmony_ci """Return a GNUTYPE_LONGNAME or GNUTYPE_LONGLINK sequence 11557db96d56Sopenharmony_ci for name. 11567db96d56Sopenharmony_ci """ 11577db96d56Sopenharmony_ci name = name.encode(encoding, errors) + NUL 11587db96d56Sopenharmony_ci 11597db96d56Sopenharmony_ci info = {} 11607db96d56Sopenharmony_ci info["name"] = "././@LongLink" 11617db96d56Sopenharmony_ci info["type"] = type 11627db96d56Sopenharmony_ci info["size"] = len(name) 11637db96d56Sopenharmony_ci info["magic"] = GNU_MAGIC 11647db96d56Sopenharmony_ci 11657db96d56Sopenharmony_ci # create extended header + name blocks. 11667db96d56Sopenharmony_ci return cls._create_header(info, USTAR_FORMAT, encoding, errors) + \ 11677db96d56Sopenharmony_ci cls._create_payload(name) 11687db96d56Sopenharmony_ci 11697db96d56Sopenharmony_ci @classmethod 11707db96d56Sopenharmony_ci def _create_pax_generic_header(cls, pax_headers, type, encoding): 11717db96d56Sopenharmony_ci """Return a POSIX.1-2008 extended or global header sequence 11727db96d56Sopenharmony_ci that contains a list of keyword, value pairs. The values 11737db96d56Sopenharmony_ci must be strings. 11747db96d56Sopenharmony_ci """ 11757db96d56Sopenharmony_ci # Check if one of the fields contains surrogate characters and thereby 11767db96d56Sopenharmony_ci # forces hdrcharset=BINARY, see _proc_pax() for more information. 11777db96d56Sopenharmony_ci binary = False 11787db96d56Sopenharmony_ci for keyword, value in pax_headers.items(): 11797db96d56Sopenharmony_ci try: 11807db96d56Sopenharmony_ci value.encode("utf-8", "strict") 11817db96d56Sopenharmony_ci except UnicodeEncodeError: 11827db96d56Sopenharmony_ci binary = True 11837db96d56Sopenharmony_ci break 11847db96d56Sopenharmony_ci 11857db96d56Sopenharmony_ci records = b"" 11867db96d56Sopenharmony_ci if binary: 11877db96d56Sopenharmony_ci # Put the hdrcharset field at the beginning of the header. 11887db96d56Sopenharmony_ci records += b"21 hdrcharset=BINARY\n" 11897db96d56Sopenharmony_ci 11907db96d56Sopenharmony_ci for keyword, value in pax_headers.items(): 11917db96d56Sopenharmony_ci keyword = keyword.encode("utf-8") 11927db96d56Sopenharmony_ci if binary: 11937db96d56Sopenharmony_ci # Try to restore the original byte representation of `value'. 11947db96d56Sopenharmony_ci # Needless to say, that the encoding must match the string. 11957db96d56Sopenharmony_ci value = value.encode(encoding, "surrogateescape") 11967db96d56Sopenharmony_ci else: 11977db96d56Sopenharmony_ci value = value.encode("utf-8") 11987db96d56Sopenharmony_ci 11997db96d56Sopenharmony_ci l = len(keyword) + len(value) + 3 # ' ' + '=' + '\n' 12007db96d56Sopenharmony_ci n = p = 0 12017db96d56Sopenharmony_ci while True: 12027db96d56Sopenharmony_ci n = l + len(str(p)) 12037db96d56Sopenharmony_ci if n == p: 12047db96d56Sopenharmony_ci break 12057db96d56Sopenharmony_ci p = n 12067db96d56Sopenharmony_ci records += bytes(str(p), "ascii") + b" " + keyword + b"=" + value + b"\n" 12077db96d56Sopenharmony_ci 12087db96d56Sopenharmony_ci # We use a hardcoded "././@PaxHeader" name like star does 12097db96d56Sopenharmony_ci # instead of the one that POSIX recommends. 12107db96d56Sopenharmony_ci info = {} 12117db96d56Sopenharmony_ci info["name"] = "././@PaxHeader" 12127db96d56Sopenharmony_ci info["type"] = type 12137db96d56Sopenharmony_ci info["size"] = len(records) 12147db96d56Sopenharmony_ci info["magic"] = POSIX_MAGIC 12157db96d56Sopenharmony_ci 12167db96d56Sopenharmony_ci # Create pax header + record blocks. 12177db96d56Sopenharmony_ci return cls._create_header(info, USTAR_FORMAT, "ascii", "replace") + \ 12187db96d56Sopenharmony_ci cls._create_payload(records) 12197db96d56Sopenharmony_ci 12207db96d56Sopenharmony_ci @classmethod 12217db96d56Sopenharmony_ci def frombuf(cls, buf, encoding, errors): 12227db96d56Sopenharmony_ci """Construct a TarInfo object from a 512 byte bytes object. 12237db96d56Sopenharmony_ci """ 12247db96d56Sopenharmony_ci if len(buf) == 0: 12257db96d56Sopenharmony_ci raise EmptyHeaderError("empty header") 12267db96d56Sopenharmony_ci if len(buf) != BLOCKSIZE: 12277db96d56Sopenharmony_ci raise TruncatedHeaderError("truncated header") 12287db96d56Sopenharmony_ci if buf.count(NUL) == BLOCKSIZE: 12297db96d56Sopenharmony_ci raise EOFHeaderError("end of file header") 12307db96d56Sopenharmony_ci 12317db96d56Sopenharmony_ci chksum = nti(buf[148:156]) 12327db96d56Sopenharmony_ci if chksum not in calc_chksums(buf): 12337db96d56Sopenharmony_ci raise InvalidHeaderError("bad checksum") 12347db96d56Sopenharmony_ci 12357db96d56Sopenharmony_ci obj = cls() 12367db96d56Sopenharmony_ci obj.name = nts(buf[0:100], encoding, errors) 12377db96d56Sopenharmony_ci obj.mode = nti(buf[100:108]) 12387db96d56Sopenharmony_ci obj.uid = nti(buf[108:116]) 12397db96d56Sopenharmony_ci obj.gid = nti(buf[116:124]) 12407db96d56Sopenharmony_ci obj.size = nti(buf[124:136]) 12417db96d56Sopenharmony_ci obj.mtime = nti(buf[136:148]) 12427db96d56Sopenharmony_ci obj.chksum = chksum 12437db96d56Sopenharmony_ci obj.type = buf[156:157] 12447db96d56Sopenharmony_ci obj.linkname = nts(buf[157:257], encoding, errors) 12457db96d56Sopenharmony_ci obj.uname = nts(buf[265:297], encoding, errors) 12467db96d56Sopenharmony_ci obj.gname = nts(buf[297:329], encoding, errors) 12477db96d56Sopenharmony_ci obj.devmajor = nti(buf[329:337]) 12487db96d56Sopenharmony_ci obj.devminor = nti(buf[337:345]) 12497db96d56Sopenharmony_ci prefix = nts(buf[345:500], encoding, errors) 12507db96d56Sopenharmony_ci 12517db96d56Sopenharmony_ci # Old V7 tar format represents a directory as a regular 12527db96d56Sopenharmony_ci # file with a trailing slash. 12537db96d56Sopenharmony_ci if obj.type == AREGTYPE and obj.name.endswith("/"): 12547db96d56Sopenharmony_ci obj.type = DIRTYPE 12557db96d56Sopenharmony_ci 12567db96d56Sopenharmony_ci # The old GNU sparse format occupies some of the unused 12577db96d56Sopenharmony_ci # space in the buffer for up to 4 sparse structures. 12587db96d56Sopenharmony_ci # Save them for later processing in _proc_sparse(). 12597db96d56Sopenharmony_ci if obj.type == GNUTYPE_SPARSE: 12607db96d56Sopenharmony_ci pos = 386 12617db96d56Sopenharmony_ci structs = [] 12627db96d56Sopenharmony_ci for i in range(4): 12637db96d56Sopenharmony_ci try: 12647db96d56Sopenharmony_ci offset = nti(buf[pos:pos + 12]) 12657db96d56Sopenharmony_ci numbytes = nti(buf[pos + 12:pos + 24]) 12667db96d56Sopenharmony_ci except ValueError: 12677db96d56Sopenharmony_ci break 12687db96d56Sopenharmony_ci structs.append((offset, numbytes)) 12697db96d56Sopenharmony_ci pos += 24 12707db96d56Sopenharmony_ci isextended = bool(buf[482]) 12717db96d56Sopenharmony_ci origsize = nti(buf[483:495]) 12727db96d56Sopenharmony_ci obj._sparse_structs = (structs, isextended, origsize) 12737db96d56Sopenharmony_ci 12747db96d56Sopenharmony_ci # Remove redundant slashes from directories. 12757db96d56Sopenharmony_ci if obj.isdir(): 12767db96d56Sopenharmony_ci obj.name = obj.name.rstrip("/") 12777db96d56Sopenharmony_ci 12787db96d56Sopenharmony_ci # Reconstruct a ustar longname. 12797db96d56Sopenharmony_ci if prefix and obj.type not in GNU_TYPES: 12807db96d56Sopenharmony_ci obj.name = prefix + "/" + obj.name 12817db96d56Sopenharmony_ci return obj 12827db96d56Sopenharmony_ci 12837db96d56Sopenharmony_ci @classmethod 12847db96d56Sopenharmony_ci def fromtarfile(cls, tarfile): 12857db96d56Sopenharmony_ci """Return the next TarInfo object from TarFile object 12867db96d56Sopenharmony_ci tarfile. 12877db96d56Sopenharmony_ci """ 12887db96d56Sopenharmony_ci buf = tarfile.fileobj.read(BLOCKSIZE) 12897db96d56Sopenharmony_ci obj = cls.frombuf(buf, tarfile.encoding, tarfile.errors) 12907db96d56Sopenharmony_ci obj.offset = tarfile.fileobj.tell() - BLOCKSIZE 12917db96d56Sopenharmony_ci return obj._proc_member(tarfile) 12927db96d56Sopenharmony_ci 12937db96d56Sopenharmony_ci #-------------------------------------------------------------------------- 12947db96d56Sopenharmony_ci # The following are methods that are called depending on the type of a 12957db96d56Sopenharmony_ci # member. The entry point is _proc_member() which can be overridden in a 12967db96d56Sopenharmony_ci # subclass to add custom _proc_*() methods. A _proc_*() method MUST 12977db96d56Sopenharmony_ci # implement the following 12987db96d56Sopenharmony_ci # operations: 12997db96d56Sopenharmony_ci # 1. Set self.offset_data to the position where the data blocks begin, 13007db96d56Sopenharmony_ci # if there is data that follows. 13017db96d56Sopenharmony_ci # 2. Set tarfile.offset to the position where the next member's header will 13027db96d56Sopenharmony_ci # begin. 13037db96d56Sopenharmony_ci # 3. Return self or another valid TarInfo object. 13047db96d56Sopenharmony_ci def _proc_member(self, tarfile): 13057db96d56Sopenharmony_ci """Choose the right processing method depending on 13067db96d56Sopenharmony_ci the type and call it. 13077db96d56Sopenharmony_ci """ 13087db96d56Sopenharmony_ci if self.type in (GNUTYPE_LONGNAME, GNUTYPE_LONGLINK): 13097db96d56Sopenharmony_ci return self._proc_gnulong(tarfile) 13107db96d56Sopenharmony_ci elif self.type == GNUTYPE_SPARSE: 13117db96d56Sopenharmony_ci return self._proc_sparse(tarfile) 13127db96d56Sopenharmony_ci elif self.type in (XHDTYPE, XGLTYPE, SOLARIS_XHDTYPE): 13137db96d56Sopenharmony_ci return self._proc_pax(tarfile) 13147db96d56Sopenharmony_ci else: 13157db96d56Sopenharmony_ci return self._proc_builtin(tarfile) 13167db96d56Sopenharmony_ci 13177db96d56Sopenharmony_ci def _proc_builtin(self, tarfile): 13187db96d56Sopenharmony_ci """Process a builtin type or an unknown type which 13197db96d56Sopenharmony_ci will be treated as a regular file. 13207db96d56Sopenharmony_ci """ 13217db96d56Sopenharmony_ci self.offset_data = tarfile.fileobj.tell() 13227db96d56Sopenharmony_ci offset = self.offset_data 13237db96d56Sopenharmony_ci if self.isreg() or self.type not in SUPPORTED_TYPES: 13247db96d56Sopenharmony_ci # Skip the following data blocks. 13257db96d56Sopenharmony_ci offset += self._block(self.size) 13267db96d56Sopenharmony_ci tarfile.offset = offset 13277db96d56Sopenharmony_ci 13287db96d56Sopenharmony_ci # Patch the TarInfo object with saved global 13297db96d56Sopenharmony_ci # header information. 13307db96d56Sopenharmony_ci self._apply_pax_info(tarfile.pax_headers, tarfile.encoding, tarfile.errors) 13317db96d56Sopenharmony_ci 13327db96d56Sopenharmony_ci # Remove redundant slashes from directories. This is to be consistent 13337db96d56Sopenharmony_ci # with frombuf(). 13347db96d56Sopenharmony_ci if self.isdir(): 13357db96d56Sopenharmony_ci self.name = self.name.rstrip("/") 13367db96d56Sopenharmony_ci 13377db96d56Sopenharmony_ci return self 13387db96d56Sopenharmony_ci 13397db96d56Sopenharmony_ci def _proc_gnulong(self, tarfile): 13407db96d56Sopenharmony_ci """Process the blocks that hold a GNU longname 13417db96d56Sopenharmony_ci or longlink member. 13427db96d56Sopenharmony_ci """ 13437db96d56Sopenharmony_ci buf = tarfile.fileobj.read(self._block(self.size)) 13447db96d56Sopenharmony_ci 13457db96d56Sopenharmony_ci # Fetch the next header and process it. 13467db96d56Sopenharmony_ci try: 13477db96d56Sopenharmony_ci next = self.fromtarfile(tarfile) 13487db96d56Sopenharmony_ci except HeaderError as e: 13497db96d56Sopenharmony_ci raise SubsequentHeaderError(str(e)) from None 13507db96d56Sopenharmony_ci 13517db96d56Sopenharmony_ci # Patch the TarInfo object from the next header with 13527db96d56Sopenharmony_ci # the longname information. 13537db96d56Sopenharmony_ci next.offset = self.offset 13547db96d56Sopenharmony_ci if self.type == GNUTYPE_LONGNAME: 13557db96d56Sopenharmony_ci next.name = nts(buf, tarfile.encoding, tarfile.errors) 13567db96d56Sopenharmony_ci elif self.type == GNUTYPE_LONGLINK: 13577db96d56Sopenharmony_ci next.linkname = nts(buf, tarfile.encoding, tarfile.errors) 13587db96d56Sopenharmony_ci 13597db96d56Sopenharmony_ci # Remove redundant slashes from directories. This is to be consistent 13607db96d56Sopenharmony_ci # with frombuf(). 13617db96d56Sopenharmony_ci if next.isdir(): 13627db96d56Sopenharmony_ci next.name = next.name.removesuffix("/") 13637db96d56Sopenharmony_ci 13647db96d56Sopenharmony_ci return next 13657db96d56Sopenharmony_ci 13667db96d56Sopenharmony_ci def _proc_sparse(self, tarfile): 13677db96d56Sopenharmony_ci """Process a GNU sparse header plus extra headers. 13687db96d56Sopenharmony_ci """ 13697db96d56Sopenharmony_ci # We already collected some sparse structures in frombuf(). 13707db96d56Sopenharmony_ci structs, isextended, origsize = self._sparse_structs 13717db96d56Sopenharmony_ci del self._sparse_structs 13727db96d56Sopenharmony_ci 13737db96d56Sopenharmony_ci # Collect sparse structures from extended header blocks. 13747db96d56Sopenharmony_ci while isextended: 13757db96d56Sopenharmony_ci buf = tarfile.fileobj.read(BLOCKSIZE) 13767db96d56Sopenharmony_ci pos = 0 13777db96d56Sopenharmony_ci for i in range(21): 13787db96d56Sopenharmony_ci try: 13797db96d56Sopenharmony_ci offset = nti(buf[pos:pos + 12]) 13807db96d56Sopenharmony_ci numbytes = nti(buf[pos + 12:pos + 24]) 13817db96d56Sopenharmony_ci except ValueError: 13827db96d56Sopenharmony_ci break 13837db96d56Sopenharmony_ci if offset and numbytes: 13847db96d56Sopenharmony_ci structs.append((offset, numbytes)) 13857db96d56Sopenharmony_ci pos += 24 13867db96d56Sopenharmony_ci isextended = bool(buf[504]) 13877db96d56Sopenharmony_ci self.sparse = structs 13887db96d56Sopenharmony_ci 13897db96d56Sopenharmony_ci self.offset_data = tarfile.fileobj.tell() 13907db96d56Sopenharmony_ci tarfile.offset = self.offset_data + self._block(self.size) 13917db96d56Sopenharmony_ci self.size = origsize 13927db96d56Sopenharmony_ci return self 13937db96d56Sopenharmony_ci 13947db96d56Sopenharmony_ci def _proc_pax(self, tarfile): 13957db96d56Sopenharmony_ci """Process an extended or global header as described in 13967db96d56Sopenharmony_ci POSIX.1-2008. 13977db96d56Sopenharmony_ci """ 13987db96d56Sopenharmony_ci # Read the header information. 13997db96d56Sopenharmony_ci buf = tarfile.fileobj.read(self._block(self.size)) 14007db96d56Sopenharmony_ci 14017db96d56Sopenharmony_ci # A pax header stores supplemental information for either 14027db96d56Sopenharmony_ci # the following file (extended) or all following files 14037db96d56Sopenharmony_ci # (global). 14047db96d56Sopenharmony_ci if self.type == XGLTYPE: 14057db96d56Sopenharmony_ci pax_headers = tarfile.pax_headers 14067db96d56Sopenharmony_ci else: 14077db96d56Sopenharmony_ci pax_headers = tarfile.pax_headers.copy() 14087db96d56Sopenharmony_ci 14097db96d56Sopenharmony_ci # Parse pax header information. A record looks like that: 14107db96d56Sopenharmony_ci # "%d %s=%s\n" % (length, keyword, value). length is the size 14117db96d56Sopenharmony_ci # of the complete record including the length field itself and 14127db96d56Sopenharmony_ci # the newline. 14137db96d56Sopenharmony_ci pos = 0 14147db96d56Sopenharmony_ci encoding = None 14157db96d56Sopenharmony_ci raw_headers = [] 14167db96d56Sopenharmony_ci while len(buf) > pos and buf[pos] != 0x00: 14177db96d56Sopenharmony_ci if not (match := _header_length_prefix_re.match(buf, pos)): 14187db96d56Sopenharmony_ci raise InvalidHeaderError("invalid header") 14197db96d56Sopenharmony_ci try: 14207db96d56Sopenharmony_ci length = int(match.group(1)) 14217db96d56Sopenharmony_ci except ValueError: 14227db96d56Sopenharmony_ci raise InvalidHeaderError("invalid header") 14237db96d56Sopenharmony_ci # Headers must be at least 5 bytes, shortest being '5 x=\n'. 14247db96d56Sopenharmony_ci # Value is allowed to be empty. 14257db96d56Sopenharmony_ci if length < 5: 14267db96d56Sopenharmony_ci raise InvalidHeaderError("invalid header") 14277db96d56Sopenharmony_ci if pos + length > len(buf): 14287db96d56Sopenharmony_ci raise InvalidHeaderError("invalid header") 14297db96d56Sopenharmony_ci 14307db96d56Sopenharmony_ci header_value_end_offset = match.start(1) + length - 1 # Last byte of the header 14317db96d56Sopenharmony_ci keyword_and_value = buf[match.end(1) + 1:header_value_end_offset] 14327db96d56Sopenharmony_ci raw_keyword, equals, raw_value = keyword_and_value.partition(b"=") 14337db96d56Sopenharmony_ci 14347db96d56Sopenharmony_ci # Check the framing of the header. The last character must be '\n' (0x0A) 14357db96d56Sopenharmony_ci if not raw_keyword or equals != b"=" or buf[header_value_end_offset] != 0x0A: 14367db96d56Sopenharmony_ci raise InvalidHeaderError("invalid header") 14377db96d56Sopenharmony_ci raw_headers.append((length, raw_keyword, raw_value)) 14387db96d56Sopenharmony_ci 14397db96d56Sopenharmony_ci # Check if the pax header contains a hdrcharset field. This tells us 14407db96d56Sopenharmony_ci # the encoding of the path, linkpath, uname and gname fields. Normally, 14417db96d56Sopenharmony_ci # these fields are UTF-8 encoded but since POSIX.1-2008 tar 14427db96d56Sopenharmony_ci # implementations are allowed to store them as raw binary strings if 14437db96d56Sopenharmony_ci # the translation to UTF-8 fails. For the time being, we don't care about 14447db96d56Sopenharmony_ci # anything other than "BINARY". The only other value that is currently 14457db96d56Sopenharmony_ci # allowed by the standard is "ISO-IR 10646 2000 UTF-8" in other words UTF-8. 14467db96d56Sopenharmony_ci # Note that we only follow the initial 'hdrcharset' setting to preserve 14477db96d56Sopenharmony_ci # the initial behavior of the 'tarfile' module. 14487db96d56Sopenharmony_ci if raw_keyword == b"hdrcharset" and encoding is None: 14497db96d56Sopenharmony_ci if raw_value == b"BINARY": 14507db96d56Sopenharmony_ci encoding = tarfile.encoding 14517db96d56Sopenharmony_ci else: # This branch ensures only the first 'hdrcharset' header is used. 14527db96d56Sopenharmony_ci encoding = "utf-8" 14537db96d56Sopenharmony_ci 14547db96d56Sopenharmony_ci pos += length 14557db96d56Sopenharmony_ci 14567db96d56Sopenharmony_ci # If no explicit hdrcharset is set, we use UTF-8 as a default. 14577db96d56Sopenharmony_ci if encoding is None: 14587db96d56Sopenharmony_ci encoding = "utf-8" 14597db96d56Sopenharmony_ci 14607db96d56Sopenharmony_ci # After parsing the raw headers we can decode them to text. 14617db96d56Sopenharmony_ci for length, raw_keyword, raw_value in raw_headers: 14627db96d56Sopenharmony_ci # Normally, we could just use "utf-8" as the encoding and "strict" 14637db96d56Sopenharmony_ci # as the error handler, but we better not take the risk. For 14647db96d56Sopenharmony_ci # example, GNU tar <= 1.23 is known to store filenames it cannot 14657db96d56Sopenharmony_ci # translate to UTF-8 as raw strings (unfortunately without a 14667db96d56Sopenharmony_ci # hdrcharset=BINARY header). 14677db96d56Sopenharmony_ci # We first try the strict standard encoding, and if that fails we 14687db96d56Sopenharmony_ci # fall back on the user's encoding and error handler. 14697db96d56Sopenharmony_ci keyword = self._decode_pax_field(raw_keyword, "utf-8", "utf-8", 14707db96d56Sopenharmony_ci tarfile.errors) 14717db96d56Sopenharmony_ci if keyword in PAX_NAME_FIELDS: 14727db96d56Sopenharmony_ci value = self._decode_pax_field(raw_value, encoding, tarfile.encoding, 14737db96d56Sopenharmony_ci tarfile.errors) 14747db96d56Sopenharmony_ci else: 14757db96d56Sopenharmony_ci value = self._decode_pax_field(raw_value, "utf-8", "utf-8", 14767db96d56Sopenharmony_ci tarfile.errors) 14777db96d56Sopenharmony_ci 14787db96d56Sopenharmony_ci pax_headers[keyword] = value 14797db96d56Sopenharmony_ci 14807db96d56Sopenharmony_ci # Fetch the next header. 14817db96d56Sopenharmony_ci try: 14827db96d56Sopenharmony_ci next = self.fromtarfile(tarfile) 14837db96d56Sopenharmony_ci except HeaderError as e: 14847db96d56Sopenharmony_ci raise SubsequentHeaderError(str(e)) from None 14857db96d56Sopenharmony_ci 14867db96d56Sopenharmony_ci # Process GNU sparse information. 14877db96d56Sopenharmony_ci if "GNU.sparse.map" in pax_headers: 14887db96d56Sopenharmony_ci # GNU extended sparse format version 0.1. 14897db96d56Sopenharmony_ci self._proc_gnusparse_01(next, pax_headers) 14907db96d56Sopenharmony_ci 14917db96d56Sopenharmony_ci elif "GNU.sparse.size" in pax_headers: 14927db96d56Sopenharmony_ci # GNU extended sparse format version 0.0. 14937db96d56Sopenharmony_ci self._proc_gnusparse_00(next, raw_headers) 14947db96d56Sopenharmony_ci 14957db96d56Sopenharmony_ci elif pax_headers.get("GNU.sparse.major") == "1" and pax_headers.get("GNU.sparse.minor") == "0": 14967db96d56Sopenharmony_ci # GNU extended sparse format version 1.0. 14977db96d56Sopenharmony_ci self._proc_gnusparse_10(next, pax_headers, tarfile) 14987db96d56Sopenharmony_ci 14997db96d56Sopenharmony_ci if self.type in (XHDTYPE, SOLARIS_XHDTYPE): 15007db96d56Sopenharmony_ci # Patch the TarInfo object with the extended header info. 15017db96d56Sopenharmony_ci next._apply_pax_info(pax_headers, tarfile.encoding, tarfile.errors) 15027db96d56Sopenharmony_ci next.offset = self.offset 15037db96d56Sopenharmony_ci 15047db96d56Sopenharmony_ci if "size" in pax_headers: 15057db96d56Sopenharmony_ci # If the extended header replaces the size field, 15067db96d56Sopenharmony_ci # we need to recalculate the offset where the next 15077db96d56Sopenharmony_ci # header starts. 15087db96d56Sopenharmony_ci offset = next.offset_data 15097db96d56Sopenharmony_ci if next.isreg() or next.type not in SUPPORTED_TYPES: 15107db96d56Sopenharmony_ci offset += next._block(next.size) 15117db96d56Sopenharmony_ci tarfile.offset = offset 15127db96d56Sopenharmony_ci 15137db96d56Sopenharmony_ci return next 15147db96d56Sopenharmony_ci 15157db96d56Sopenharmony_ci def _proc_gnusparse_00(self, next, raw_headers): 15167db96d56Sopenharmony_ci """Process a GNU tar extended sparse header, version 0.0. 15177db96d56Sopenharmony_ci """ 15187db96d56Sopenharmony_ci offsets = [] 15197db96d56Sopenharmony_ci numbytes = [] 15207db96d56Sopenharmony_ci for _, keyword, value in raw_headers: 15217db96d56Sopenharmony_ci if keyword == b"GNU.sparse.offset": 15227db96d56Sopenharmony_ci try: 15237db96d56Sopenharmony_ci offsets.append(int(value.decode())) 15247db96d56Sopenharmony_ci except ValueError: 15257db96d56Sopenharmony_ci raise InvalidHeaderError("invalid header") 15267db96d56Sopenharmony_ci 15277db96d56Sopenharmony_ci elif keyword == b"GNU.sparse.numbytes": 15287db96d56Sopenharmony_ci try: 15297db96d56Sopenharmony_ci numbytes.append(int(value.decode())) 15307db96d56Sopenharmony_ci except ValueError: 15317db96d56Sopenharmony_ci raise InvalidHeaderError("invalid header") 15327db96d56Sopenharmony_ci 15337db96d56Sopenharmony_ci next.sparse = list(zip(offsets, numbytes)) 15347db96d56Sopenharmony_ci 15357db96d56Sopenharmony_ci def _proc_gnusparse_01(self, next, pax_headers): 15367db96d56Sopenharmony_ci """Process a GNU tar extended sparse header, version 0.1. 15377db96d56Sopenharmony_ci """ 15387db96d56Sopenharmony_ci sparse = [int(x) for x in pax_headers["GNU.sparse.map"].split(",")] 15397db96d56Sopenharmony_ci next.sparse = list(zip(sparse[::2], sparse[1::2])) 15407db96d56Sopenharmony_ci 15417db96d56Sopenharmony_ci def _proc_gnusparse_10(self, next, pax_headers, tarfile): 15427db96d56Sopenharmony_ci """Process a GNU tar extended sparse header, version 1.0. 15437db96d56Sopenharmony_ci """ 15447db96d56Sopenharmony_ci fields = None 15457db96d56Sopenharmony_ci sparse = [] 15467db96d56Sopenharmony_ci buf = tarfile.fileobj.read(BLOCKSIZE) 15477db96d56Sopenharmony_ci fields, buf = buf.split(b"\n", 1) 15487db96d56Sopenharmony_ci fields = int(fields) 15497db96d56Sopenharmony_ci while len(sparse) < fields * 2: 15507db96d56Sopenharmony_ci if b"\n" not in buf: 15517db96d56Sopenharmony_ci buf += tarfile.fileobj.read(BLOCKSIZE) 15527db96d56Sopenharmony_ci number, buf = buf.split(b"\n", 1) 15537db96d56Sopenharmony_ci sparse.append(int(number)) 15547db96d56Sopenharmony_ci next.offset_data = tarfile.fileobj.tell() 15557db96d56Sopenharmony_ci next.sparse = list(zip(sparse[::2], sparse[1::2])) 15567db96d56Sopenharmony_ci 15577db96d56Sopenharmony_ci def _apply_pax_info(self, pax_headers, encoding, errors): 15587db96d56Sopenharmony_ci """Replace fields with supplemental information from a previous 15597db96d56Sopenharmony_ci pax extended or global header. 15607db96d56Sopenharmony_ci """ 15617db96d56Sopenharmony_ci for keyword, value in pax_headers.items(): 15627db96d56Sopenharmony_ci if keyword == "GNU.sparse.name": 15637db96d56Sopenharmony_ci setattr(self, "path", value) 15647db96d56Sopenharmony_ci elif keyword == "GNU.sparse.size": 15657db96d56Sopenharmony_ci setattr(self, "size", int(value)) 15667db96d56Sopenharmony_ci elif keyword == "GNU.sparse.realsize": 15677db96d56Sopenharmony_ci setattr(self, "size", int(value)) 15687db96d56Sopenharmony_ci elif keyword in PAX_FIELDS: 15697db96d56Sopenharmony_ci if keyword in PAX_NUMBER_FIELDS: 15707db96d56Sopenharmony_ci try: 15717db96d56Sopenharmony_ci value = PAX_NUMBER_FIELDS[keyword](value) 15727db96d56Sopenharmony_ci except ValueError: 15737db96d56Sopenharmony_ci value = 0 15747db96d56Sopenharmony_ci if keyword == "path": 15757db96d56Sopenharmony_ci value = value.rstrip("/") 15767db96d56Sopenharmony_ci setattr(self, keyword, value) 15777db96d56Sopenharmony_ci 15787db96d56Sopenharmony_ci self.pax_headers = pax_headers.copy() 15797db96d56Sopenharmony_ci 15807db96d56Sopenharmony_ci def _decode_pax_field(self, value, encoding, fallback_encoding, fallback_errors): 15817db96d56Sopenharmony_ci """Decode a single field from a pax record. 15827db96d56Sopenharmony_ci """ 15837db96d56Sopenharmony_ci try: 15847db96d56Sopenharmony_ci return value.decode(encoding, "strict") 15857db96d56Sopenharmony_ci except UnicodeDecodeError: 15867db96d56Sopenharmony_ci return value.decode(fallback_encoding, fallback_errors) 15877db96d56Sopenharmony_ci 15887db96d56Sopenharmony_ci def _block(self, count): 15897db96d56Sopenharmony_ci """Round up a byte count by BLOCKSIZE and return it, 15907db96d56Sopenharmony_ci e.g. _block(834) => 1024. 15917db96d56Sopenharmony_ci """ 15927db96d56Sopenharmony_ci blocks, remainder = divmod(count, BLOCKSIZE) 15937db96d56Sopenharmony_ci if remainder: 15947db96d56Sopenharmony_ci blocks += 1 15957db96d56Sopenharmony_ci return blocks * BLOCKSIZE 15967db96d56Sopenharmony_ci 15977db96d56Sopenharmony_ci def isreg(self): 15987db96d56Sopenharmony_ci 'Return True if the Tarinfo object is a regular file.' 15997db96d56Sopenharmony_ci return self.type in REGULAR_TYPES 16007db96d56Sopenharmony_ci 16017db96d56Sopenharmony_ci def isfile(self): 16027db96d56Sopenharmony_ci 'Return True if the Tarinfo object is a regular file.' 16037db96d56Sopenharmony_ci return self.isreg() 16047db96d56Sopenharmony_ci 16057db96d56Sopenharmony_ci def isdir(self): 16067db96d56Sopenharmony_ci 'Return True if it is a directory.' 16077db96d56Sopenharmony_ci return self.type == DIRTYPE 16087db96d56Sopenharmony_ci 16097db96d56Sopenharmony_ci def issym(self): 16107db96d56Sopenharmony_ci 'Return True if it is a symbolic link.' 16117db96d56Sopenharmony_ci return self.type == SYMTYPE 16127db96d56Sopenharmony_ci 16137db96d56Sopenharmony_ci def islnk(self): 16147db96d56Sopenharmony_ci 'Return True if it is a hard link.' 16157db96d56Sopenharmony_ci return self.type == LNKTYPE 16167db96d56Sopenharmony_ci 16177db96d56Sopenharmony_ci def ischr(self): 16187db96d56Sopenharmony_ci 'Return True if it is a character device.' 16197db96d56Sopenharmony_ci return self.type == CHRTYPE 16207db96d56Sopenharmony_ci 16217db96d56Sopenharmony_ci def isblk(self): 16227db96d56Sopenharmony_ci 'Return True if it is a block device.' 16237db96d56Sopenharmony_ci return self.type == BLKTYPE 16247db96d56Sopenharmony_ci 16257db96d56Sopenharmony_ci def isfifo(self): 16267db96d56Sopenharmony_ci 'Return True if it is a FIFO.' 16277db96d56Sopenharmony_ci return self.type == FIFOTYPE 16287db96d56Sopenharmony_ci 16297db96d56Sopenharmony_ci def issparse(self): 16307db96d56Sopenharmony_ci return self.sparse is not None 16317db96d56Sopenharmony_ci 16327db96d56Sopenharmony_ci def isdev(self): 16337db96d56Sopenharmony_ci 'Return True if it is one of character device, block device or FIFO.' 16347db96d56Sopenharmony_ci return self.type in (CHRTYPE, BLKTYPE, FIFOTYPE) 16357db96d56Sopenharmony_ci# class TarInfo 16367db96d56Sopenharmony_ci 16377db96d56Sopenharmony_ciclass TarFile(object): 16387db96d56Sopenharmony_ci """The TarFile Class provides an interface to tar archives. 16397db96d56Sopenharmony_ci """ 16407db96d56Sopenharmony_ci 16417db96d56Sopenharmony_ci debug = 0 # May be set from 0 (no msgs) to 3 (all msgs) 16427db96d56Sopenharmony_ci 16437db96d56Sopenharmony_ci dereference = False # If true, add content of linked file to the 16447db96d56Sopenharmony_ci # tar file, else the link. 16457db96d56Sopenharmony_ci 16467db96d56Sopenharmony_ci ignore_zeros = False # If true, skips empty or invalid blocks and 16477db96d56Sopenharmony_ci # continues processing. 16487db96d56Sopenharmony_ci 16497db96d56Sopenharmony_ci errorlevel = 1 # If 0, fatal errors only appear in debug 16507db96d56Sopenharmony_ci # messages (if debug >= 0). If > 0, errors 16517db96d56Sopenharmony_ci # are passed to the caller as exceptions. 16527db96d56Sopenharmony_ci 16537db96d56Sopenharmony_ci format = DEFAULT_FORMAT # The format to use when creating an archive. 16547db96d56Sopenharmony_ci 16557db96d56Sopenharmony_ci encoding = ENCODING # Encoding for 8-bit character strings. 16567db96d56Sopenharmony_ci 16577db96d56Sopenharmony_ci errors = None # Error handler for unicode conversion. 16587db96d56Sopenharmony_ci 16597db96d56Sopenharmony_ci tarinfo = TarInfo # The default TarInfo class to use. 16607db96d56Sopenharmony_ci 16617db96d56Sopenharmony_ci fileobject = ExFileObject # The file-object for extractfile(). 16627db96d56Sopenharmony_ci 16637db96d56Sopenharmony_ci extraction_filter = None # The default filter for extraction. 16647db96d56Sopenharmony_ci 16657db96d56Sopenharmony_ci def __init__(self, name=None, mode="r", fileobj=None, format=None, 16667db96d56Sopenharmony_ci tarinfo=None, dereference=None, ignore_zeros=None, encoding=None, 16677db96d56Sopenharmony_ci errors="surrogateescape", pax_headers=None, debug=None, 16687db96d56Sopenharmony_ci errorlevel=None, copybufsize=None): 16697db96d56Sopenharmony_ci """Open an (uncompressed) tar archive `name'. `mode' is either 'r' to 16707db96d56Sopenharmony_ci read from an existing archive, 'a' to append data to an existing 16717db96d56Sopenharmony_ci file or 'w' to create a new file overwriting an existing one. `mode' 16727db96d56Sopenharmony_ci defaults to 'r'. 16737db96d56Sopenharmony_ci If `fileobj' is given, it is used for reading or writing data. If it 16747db96d56Sopenharmony_ci can be determined, `mode' is overridden by `fileobj's mode. 16757db96d56Sopenharmony_ci `fileobj' is not closed, when TarFile is closed. 16767db96d56Sopenharmony_ci """ 16777db96d56Sopenharmony_ci modes = {"r": "rb", "a": "r+b", "w": "wb", "x": "xb"} 16787db96d56Sopenharmony_ci if mode not in modes: 16797db96d56Sopenharmony_ci raise ValueError("mode must be 'r', 'a', 'w' or 'x'") 16807db96d56Sopenharmony_ci self.mode = mode 16817db96d56Sopenharmony_ci self._mode = modes[mode] 16827db96d56Sopenharmony_ci 16837db96d56Sopenharmony_ci if not fileobj: 16847db96d56Sopenharmony_ci if self.mode == "a" and not os.path.exists(name): 16857db96d56Sopenharmony_ci # Create nonexistent files in append mode. 16867db96d56Sopenharmony_ci self.mode = "w" 16877db96d56Sopenharmony_ci self._mode = "wb" 16887db96d56Sopenharmony_ci fileobj = bltn_open(name, self._mode) 16897db96d56Sopenharmony_ci self._extfileobj = False 16907db96d56Sopenharmony_ci else: 16917db96d56Sopenharmony_ci if (name is None and hasattr(fileobj, "name") and 16927db96d56Sopenharmony_ci isinstance(fileobj.name, (str, bytes))): 16937db96d56Sopenharmony_ci name = fileobj.name 16947db96d56Sopenharmony_ci if hasattr(fileobj, "mode"): 16957db96d56Sopenharmony_ci self._mode = fileobj.mode 16967db96d56Sopenharmony_ci self._extfileobj = True 16977db96d56Sopenharmony_ci self.name = os.path.abspath(name) if name else None 16987db96d56Sopenharmony_ci self.fileobj = fileobj 16997db96d56Sopenharmony_ci 17007db96d56Sopenharmony_ci # Init attributes. 17017db96d56Sopenharmony_ci if format is not None: 17027db96d56Sopenharmony_ci self.format = format 17037db96d56Sopenharmony_ci if tarinfo is not None: 17047db96d56Sopenharmony_ci self.tarinfo = tarinfo 17057db96d56Sopenharmony_ci if dereference is not None: 17067db96d56Sopenharmony_ci self.dereference = dereference 17077db96d56Sopenharmony_ci if ignore_zeros is not None: 17087db96d56Sopenharmony_ci self.ignore_zeros = ignore_zeros 17097db96d56Sopenharmony_ci if encoding is not None: 17107db96d56Sopenharmony_ci self.encoding = encoding 17117db96d56Sopenharmony_ci self.errors = errors 17127db96d56Sopenharmony_ci 17137db96d56Sopenharmony_ci if pax_headers is not None and self.format == PAX_FORMAT: 17147db96d56Sopenharmony_ci self.pax_headers = pax_headers 17157db96d56Sopenharmony_ci else: 17167db96d56Sopenharmony_ci self.pax_headers = {} 17177db96d56Sopenharmony_ci 17187db96d56Sopenharmony_ci if debug is not None: 17197db96d56Sopenharmony_ci self.debug = debug 17207db96d56Sopenharmony_ci if errorlevel is not None: 17217db96d56Sopenharmony_ci self.errorlevel = errorlevel 17227db96d56Sopenharmony_ci 17237db96d56Sopenharmony_ci # Init datastructures. 17247db96d56Sopenharmony_ci self.copybufsize = copybufsize 17257db96d56Sopenharmony_ci self.closed = False 17267db96d56Sopenharmony_ci self.members = [] # list of members as TarInfo objects 17277db96d56Sopenharmony_ci self._loaded = False # flag if all members have been read 17287db96d56Sopenharmony_ci self.offset = self.fileobj.tell() 17297db96d56Sopenharmony_ci # current position in the archive file 17307db96d56Sopenharmony_ci self.inodes = {} # dictionary caching the inodes of 17317db96d56Sopenharmony_ci # archive members already added 17327db96d56Sopenharmony_ci 17337db96d56Sopenharmony_ci try: 17347db96d56Sopenharmony_ci if self.mode == "r": 17357db96d56Sopenharmony_ci self.firstmember = None 17367db96d56Sopenharmony_ci self.firstmember = self.next() 17377db96d56Sopenharmony_ci 17387db96d56Sopenharmony_ci if self.mode == "a": 17397db96d56Sopenharmony_ci # Move to the end of the archive, 17407db96d56Sopenharmony_ci # before the first empty block. 17417db96d56Sopenharmony_ci while True: 17427db96d56Sopenharmony_ci self.fileobj.seek(self.offset) 17437db96d56Sopenharmony_ci try: 17447db96d56Sopenharmony_ci tarinfo = self.tarinfo.fromtarfile(self) 17457db96d56Sopenharmony_ci self.members.append(tarinfo) 17467db96d56Sopenharmony_ci except EOFHeaderError: 17477db96d56Sopenharmony_ci self.fileobj.seek(self.offset) 17487db96d56Sopenharmony_ci break 17497db96d56Sopenharmony_ci except HeaderError as e: 17507db96d56Sopenharmony_ci raise ReadError(str(e)) from None 17517db96d56Sopenharmony_ci 17527db96d56Sopenharmony_ci if self.mode in ("a", "w", "x"): 17537db96d56Sopenharmony_ci self._loaded = True 17547db96d56Sopenharmony_ci 17557db96d56Sopenharmony_ci if self.pax_headers: 17567db96d56Sopenharmony_ci buf = self.tarinfo.create_pax_global_header(self.pax_headers.copy()) 17577db96d56Sopenharmony_ci self.fileobj.write(buf) 17587db96d56Sopenharmony_ci self.offset += len(buf) 17597db96d56Sopenharmony_ci except: 17607db96d56Sopenharmony_ci if not self._extfileobj: 17617db96d56Sopenharmony_ci self.fileobj.close() 17627db96d56Sopenharmony_ci self.closed = True 17637db96d56Sopenharmony_ci raise 17647db96d56Sopenharmony_ci 17657db96d56Sopenharmony_ci #-------------------------------------------------------------------------- 17667db96d56Sopenharmony_ci # Below are the classmethods which act as alternate constructors to the 17677db96d56Sopenharmony_ci # TarFile class. The open() method is the only one that is needed for 17687db96d56Sopenharmony_ci # public use; it is the "super"-constructor and is able to select an 17697db96d56Sopenharmony_ci # adequate "sub"-constructor for a particular compression using the mapping 17707db96d56Sopenharmony_ci # from OPEN_METH. 17717db96d56Sopenharmony_ci # 17727db96d56Sopenharmony_ci # This concept allows one to subclass TarFile without losing the comfort of 17737db96d56Sopenharmony_ci # the super-constructor. A sub-constructor is registered and made available 17747db96d56Sopenharmony_ci # by adding it to the mapping in OPEN_METH. 17757db96d56Sopenharmony_ci 17767db96d56Sopenharmony_ci @classmethod 17777db96d56Sopenharmony_ci def open(cls, name=None, mode="r", fileobj=None, bufsize=RECORDSIZE, **kwargs): 17787db96d56Sopenharmony_ci """Open a tar archive for reading, writing or appending. Return 17797db96d56Sopenharmony_ci an appropriate TarFile class. 17807db96d56Sopenharmony_ci 17817db96d56Sopenharmony_ci mode: 17827db96d56Sopenharmony_ci 'r' or 'r:*' open for reading with transparent compression 17837db96d56Sopenharmony_ci 'r:' open for reading exclusively uncompressed 17847db96d56Sopenharmony_ci 'r:gz' open for reading with gzip compression 17857db96d56Sopenharmony_ci 'r:bz2' open for reading with bzip2 compression 17867db96d56Sopenharmony_ci 'r:xz' open for reading with lzma compression 17877db96d56Sopenharmony_ci 'a' or 'a:' open for appending, creating the file if necessary 17887db96d56Sopenharmony_ci 'w' or 'w:' open for writing without compression 17897db96d56Sopenharmony_ci 'w:gz' open for writing with gzip compression 17907db96d56Sopenharmony_ci 'w:bz2' open for writing with bzip2 compression 17917db96d56Sopenharmony_ci 'w:xz' open for writing with lzma compression 17927db96d56Sopenharmony_ci 17937db96d56Sopenharmony_ci 'x' or 'x:' create a tarfile exclusively without compression, raise 17947db96d56Sopenharmony_ci an exception if the file is already created 17957db96d56Sopenharmony_ci 'x:gz' create a gzip compressed tarfile, raise an exception 17967db96d56Sopenharmony_ci if the file is already created 17977db96d56Sopenharmony_ci 'x:bz2' create a bzip2 compressed tarfile, raise an exception 17987db96d56Sopenharmony_ci if the file is already created 17997db96d56Sopenharmony_ci 'x:xz' create an lzma compressed tarfile, raise an exception 18007db96d56Sopenharmony_ci if the file is already created 18017db96d56Sopenharmony_ci 18027db96d56Sopenharmony_ci 'r|*' open a stream of tar blocks with transparent compression 18037db96d56Sopenharmony_ci 'r|' open an uncompressed stream of tar blocks for reading 18047db96d56Sopenharmony_ci 'r|gz' open a gzip compressed stream of tar blocks 18057db96d56Sopenharmony_ci 'r|bz2' open a bzip2 compressed stream of tar blocks 18067db96d56Sopenharmony_ci 'r|xz' open an lzma compressed stream of tar blocks 18077db96d56Sopenharmony_ci 'w|' open an uncompressed stream for writing 18087db96d56Sopenharmony_ci 'w|gz' open a gzip compressed stream for writing 18097db96d56Sopenharmony_ci 'w|bz2' open a bzip2 compressed stream for writing 18107db96d56Sopenharmony_ci 'w|xz' open an lzma compressed stream for writing 18117db96d56Sopenharmony_ci """ 18127db96d56Sopenharmony_ci 18137db96d56Sopenharmony_ci if not name and not fileobj: 18147db96d56Sopenharmony_ci raise ValueError("nothing to open") 18157db96d56Sopenharmony_ci 18167db96d56Sopenharmony_ci if mode in ("r", "r:*"): 18177db96d56Sopenharmony_ci # Find out which *open() is appropriate for opening the file. 18187db96d56Sopenharmony_ci def not_compressed(comptype): 18197db96d56Sopenharmony_ci return cls.OPEN_METH[comptype] == 'taropen' 18207db96d56Sopenharmony_ci error_msgs = [] 18217db96d56Sopenharmony_ci for comptype in sorted(cls.OPEN_METH, key=not_compressed): 18227db96d56Sopenharmony_ci func = getattr(cls, cls.OPEN_METH[comptype]) 18237db96d56Sopenharmony_ci if fileobj is not None: 18247db96d56Sopenharmony_ci saved_pos = fileobj.tell() 18257db96d56Sopenharmony_ci try: 18267db96d56Sopenharmony_ci return func(name, "r", fileobj, **kwargs) 18277db96d56Sopenharmony_ci except (ReadError, CompressionError) as e: 18287db96d56Sopenharmony_ci error_msgs.append(f'- method {comptype}: {e!r}') 18297db96d56Sopenharmony_ci if fileobj is not None: 18307db96d56Sopenharmony_ci fileobj.seek(saved_pos) 18317db96d56Sopenharmony_ci continue 18327db96d56Sopenharmony_ci error_msgs_summary = '\n'.join(error_msgs) 18337db96d56Sopenharmony_ci raise ReadError(f"file could not be opened successfully:\n{error_msgs_summary}") 18347db96d56Sopenharmony_ci 18357db96d56Sopenharmony_ci elif ":" in mode: 18367db96d56Sopenharmony_ci filemode, comptype = mode.split(":", 1) 18377db96d56Sopenharmony_ci filemode = filemode or "r" 18387db96d56Sopenharmony_ci comptype = comptype or "tar" 18397db96d56Sopenharmony_ci 18407db96d56Sopenharmony_ci # Select the *open() function according to 18417db96d56Sopenharmony_ci # given compression. 18427db96d56Sopenharmony_ci if comptype in cls.OPEN_METH: 18437db96d56Sopenharmony_ci func = getattr(cls, cls.OPEN_METH[comptype]) 18447db96d56Sopenharmony_ci else: 18457db96d56Sopenharmony_ci raise CompressionError("unknown compression type %r" % comptype) 18467db96d56Sopenharmony_ci return func(name, filemode, fileobj, **kwargs) 18477db96d56Sopenharmony_ci 18487db96d56Sopenharmony_ci elif "|" in mode: 18497db96d56Sopenharmony_ci filemode, comptype = mode.split("|", 1) 18507db96d56Sopenharmony_ci filemode = filemode or "r" 18517db96d56Sopenharmony_ci comptype = comptype or "tar" 18527db96d56Sopenharmony_ci 18537db96d56Sopenharmony_ci if filemode not in ("r", "w"): 18547db96d56Sopenharmony_ci raise ValueError("mode must be 'r' or 'w'") 18557db96d56Sopenharmony_ci 18567db96d56Sopenharmony_ci stream = _Stream(name, filemode, comptype, fileobj, bufsize) 18577db96d56Sopenharmony_ci try: 18587db96d56Sopenharmony_ci t = cls(name, filemode, stream, **kwargs) 18597db96d56Sopenharmony_ci except: 18607db96d56Sopenharmony_ci stream.close() 18617db96d56Sopenharmony_ci raise 18627db96d56Sopenharmony_ci t._extfileobj = False 18637db96d56Sopenharmony_ci return t 18647db96d56Sopenharmony_ci 18657db96d56Sopenharmony_ci elif mode in ("a", "w", "x"): 18667db96d56Sopenharmony_ci return cls.taropen(name, mode, fileobj, **kwargs) 18677db96d56Sopenharmony_ci 18687db96d56Sopenharmony_ci raise ValueError("undiscernible mode") 18697db96d56Sopenharmony_ci 18707db96d56Sopenharmony_ci @classmethod 18717db96d56Sopenharmony_ci def taropen(cls, name, mode="r", fileobj=None, **kwargs): 18727db96d56Sopenharmony_ci """Open uncompressed tar archive name for reading or writing. 18737db96d56Sopenharmony_ci """ 18747db96d56Sopenharmony_ci if mode not in ("r", "a", "w", "x"): 18757db96d56Sopenharmony_ci raise ValueError("mode must be 'r', 'a', 'w' or 'x'") 18767db96d56Sopenharmony_ci return cls(name, mode, fileobj, **kwargs) 18777db96d56Sopenharmony_ci 18787db96d56Sopenharmony_ci @classmethod 18797db96d56Sopenharmony_ci def gzopen(cls, name, mode="r", fileobj=None, compresslevel=9, **kwargs): 18807db96d56Sopenharmony_ci """Open gzip compressed tar archive name for reading or writing. 18817db96d56Sopenharmony_ci Appending is not allowed. 18827db96d56Sopenharmony_ci """ 18837db96d56Sopenharmony_ci if mode not in ("r", "w", "x"): 18847db96d56Sopenharmony_ci raise ValueError("mode must be 'r', 'w' or 'x'") 18857db96d56Sopenharmony_ci 18867db96d56Sopenharmony_ci try: 18877db96d56Sopenharmony_ci from gzip import GzipFile 18887db96d56Sopenharmony_ci except ImportError: 18897db96d56Sopenharmony_ci raise CompressionError("gzip module is not available") from None 18907db96d56Sopenharmony_ci 18917db96d56Sopenharmony_ci try: 18927db96d56Sopenharmony_ci fileobj = GzipFile(name, mode + "b", compresslevel, fileobj) 18937db96d56Sopenharmony_ci except OSError as e: 18947db96d56Sopenharmony_ci if fileobj is not None and mode == 'r': 18957db96d56Sopenharmony_ci raise ReadError("not a gzip file") from e 18967db96d56Sopenharmony_ci raise 18977db96d56Sopenharmony_ci 18987db96d56Sopenharmony_ci try: 18997db96d56Sopenharmony_ci t = cls.taropen(name, mode, fileobj, **kwargs) 19007db96d56Sopenharmony_ci except OSError as e: 19017db96d56Sopenharmony_ci fileobj.close() 19027db96d56Sopenharmony_ci if mode == 'r': 19037db96d56Sopenharmony_ci raise ReadError("not a gzip file") from e 19047db96d56Sopenharmony_ci raise 19057db96d56Sopenharmony_ci except: 19067db96d56Sopenharmony_ci fileobj.close() 19077db96d56Sopenharmony_ci raise 19087db96d56Sopenharmony_ci t._extfileobj = False 19097db96d56Sopenharmony_ci return t 19107db96d56Sopenharmony_ci 19117db96d56Sopenharmony_ci @classmethod 19127db96d56Sopenharmony_ci def bz2open(cls, name, mode="r", fileobj=None, compresslevel=9, **kwargs): 19137db96d56Sopenharmony_ci """Open bzip2 compressed tar archive name for reading or writing. 19147db96d56Sopenharmony_ci Appending is not allowed. 19157db96d56Sopenharmony_ci """ 19167db96d56Sopenharmony_ci if mode not in ("r", "w", "x"): 19177db96d56Sopenharmony_ci raise ValueError("mode must be 'r', 'w' or 'x'") 19187db96d56Sopenharmony_ci 19197db96d56Sopenharmony_ci try: 19207db96d56Sopenharmony_ci from bz2 import BZ2File 19217db96d56Sopenharmony_ci except ImportError: 19227db96d56Sopenharmony_ci raise CompressionError("bz2 module is not available") from None 19237db96d56Sopenharmony_ci 19247db96d56Sopenharmony_ci fileobj = BZ2File(fileobj or name, mode, compresslevel=compresslevel) 19257db96d56Sopenharmony_ci 19267db96d56Sopenharmony_ci try: 19277db96d56Sopenharmony_ci t = cls.taropen(name, mode, fileobj, **kwargs) 19287db96d56Sopenharmony_ci except (OSError, EOFError) as e: 19297db96d56Sopenharmony_ci fileobj.close() 19307db96d56Sopenharmony_ci if mode == 'r': 19317db96d56Sopenharmony_ci raise ReadError("not a bzip2 file") from e 19327db96d56Sopenharmony_ci raise 19337db96d56Sopenharmony_ci except: 19347db96d56Sopenharmony_ci fileobj.close() 19357db96d56Sopenharmony_ci raise 19367db96d56Sopenharmony_ci t._extfileobj = False 19377db96d56Sopenharmony_ci return t 19387db96d56Sopenharmony_ci 19397db96d56Sopenharmony_ci @classmethod 19407db96d56Sopenharmony_ci def xzopen(cls, name, mode="r", fileobj=None, preset=None, **kwargs): 19417db96d56Sopenharmony_ci """Open lzma compressed tar archive name for reading or writing. 19427db96d56Sopenharmony_ci Appending is not allowed. 19437db96d56Sopenharmony_ci """ 19447db96d56Sopenharmony_ci if mode not in ("r", "w", "x"): 19457db96d56Sopenharmony_ci raise ValueError("mode must be 'r', 'w' or 'x'") 19467db96d56Sopenharmony_ci 19477db96d56Sopenharmony_ci try: 19487db96d56Sopenharmony_ci from lzma import LZMAFile, LZMAError 19497db96d56Sopenharmony_ci except ImportError: 19507db96d56Sopenharmony_ci raise CompressionError("lzma module is not available") from None 19517db96d56Sopenharmony_ci 19527db96d56Sopenharmony_ci fileobj = LZMAFile(fileobj or name, mode, preset=preset) 19537db96d56Sopenharmony_ci 19547db96d56Sopenharmony_ci try: 19557db96d56Sopenharmony_ci t = cls.taropen(name, mode, fileobj, **kwargs) 19567db96d56Sopenharmony_ci except (LZMAError, EOFError) as e: 19577db96d56Sopenharmony_ci fileobj.close() 19587db96d56Sopenharmony_ci if mode == 'r': 19597db96d56Sopenharmony_ci raise ReadError("not an lzma file") from e 19607db96d56Sopenharmony_ci raise 19617db96d56Sopenharmony_ci except: 19627db96d56Sopenharmony_ci fileobj.close() 19637db96d56Sopenharmony_ci raise 19647db96d56Sopenharmony_ci t._extfileobj = False 19657db96d56Sopenharmony_ci return t 19667db96d56Sopenharmony_ci 19677db96d56Sopenharmony_ci # All *open() methods are registered here. 19687db96d56Sopenharmony_ci OPEN_METH = { 19697db96d56Sopenharmony_ci "tar": "taropen", # uncompressed tar 19707db96d56Sopenharmony_ci "gz": "gzopen", # gzip compressed tar 19717db96d56Sopenharmony_ci "bz2": "bz2open", # bzip2 compressed tar 19727db96d56Sopenharmony_ci "xz": "xzopen" # lzma compressed tar 19737db96d56Sopenharmony_ci } 19747db96d56Sopenharmony_ci 19757db96d56Sopenharmony_ci #-------------------------------------------------------------------------- 19767db96d56Sopenharmony_ci # The public methods which TarFile provides: 19777db96d56Sopenharmony_ci 19787db96d56Sopenharmony_ci def close(self): 19797db96d56Sopenharmony_ci """Close the TarFile. In write-mode, two finishing zero blocks are 19807db96d56Sopenharmony_ci appended to the archive. 19817db96d56Sopenharmony_ci """ 19827db96d56Sopenharmony_ci if self.closed: 19837db96d56Sopenharmony_ci return 19847db96d56Sopenharmony_ci 19857db96d56Sopenharmony_ci self.closed = True 19867db96d56Sopenharmony_ci try: 19877db96d56Sopenharmony_ci if self.mode in ("a", "w", "x"): 19887db96d56Sopenharmony_ci self.fileobj.write(NUL * (BLOCKSIZE * 2)) 19897db96d56Sopenharmony_ci self.offset += (BLOCKSIZE * 2) 19907db96d56Sopenharmony_ci # fill up the end with zero-blocks 19917db96d56Sopenharmony_ci # (like option -b20 for tar does) 19927db96d56Sopenharmony_ci blocks, remainder = divmod(self.offset, RECORDSIZE) 19937db96d56Sopenharmony_ci if remainder > 0: 19947db96d56Sopenharmony_ci self.fileobj.write(NUL * (RECORDSIZE - remainder)) 19957db96d56Sopenharmony_ci finally: 19967db96d56Sopenharmony_ci if not self._extfileobj: 19977db96d56Sopenharmony_ci self.fileobj.close() 19987db96d56Sopenharmony_ci 19997db96d56Sopenharmony_ci def getmember(self, name): 20007db96d56Sopenharmony_ci """Return a TarInfo object for member `name'. If `name' can not be 20017db96d56Sopenharmony_ci found in the archive, KeyError is raised. If a member occurs more 20027db96d56Sopenharmony_ci than once in the archive, its last occurrence is assumed to be the 20037db96d56Sopenharmony_ci most up-to-date version. 20047db96d56Sopenharmony_ci """ 20057db96d56Sopenharmony_ci tarinfo = self._getmember(name.rstrip('/')) 20067db96d56Sopenharmony_ci if tarinfo is None: 20077db96d56Sopenharmony_ci raise KeyError("filename %r not found" % name) 20087db96d56Sopenharmony_ci return tarinfo 20097db96d56Sopenharmony_ci 20107db96d56Sopenharmony_ci def getmembers(self): 20117db96d56Sopenharmony_ci """Return the members of the archive as a list of TarInfo objects. The 20127db96d56Sopenharmony_ci list has the same order as the members in the archive. 20137db96d56Sopenharmony_ci """ 20147db96d56Sopenharmony_ci self._check() 20157db96d56Sopenharmony_ci if not self._loaded: # if we want to obtain a list of 20167db96d56Sopenharmony_ci self._load() # all members, we first have to 20177db96d56Sopenharmony_ci # scan the whole archive. 20187db96d56Sopenharmony_ci return self.members 20197db96d56Sopenharmony_ci 20207db96d56Sopenharmony_ci def getnames(self): 20217db96d56Sopenharmony_ci """Return the members of the archive as a list of their names. It has 20227db96d56Sopenharmony_ci the same order as the list returned by getmembers(). 20237db96d56Sopenharmony_ci """ 20247db96d56Sopenharmony_ci return [tarinfo.name for tarinfo in self.getmembers()] 20257db96d56Sopenharmony_ci 20267db96d56Sopenharmony_ci def gettarinfo(self, name=None, arcname=None, fileobj=None): 20277db96d56Sopenharmony_ci """Create a TarInfo object from the result of os.stat or equivalent 20287db96d56Sopenharmony_ci on an existing file. The file is either named by `name', or 20297db96d56Sopenharmony_ci specified as a file object `fileobj' with a file descriptor. If 20307db96d56Sopenharmony_ci given, `arcname' specifies an alternative name for the file in the 20317db96d56Sopenharmony_ci archive, otherwise, the name is taken from the 'name' attribute of 20327db96d56Sopenharmony_ci 'fileobj', or the 'name' argument. The name should be a text 20337db96d56Sopenharmony_ci string. 20347db96d56Sopenharmony_ci """ 20357db96d56Sopenharmony_ci self._check("awx") 20367db96d56Sopenharmony_ci 20377db96d56Sopenharmony_ci # When fileobj is given, replace name by 20387db96d56Sopenharmony_ci # fileobj's real name. 20397db96d56Sopenharmony_ci if fileobj is not None: 20407db96d56Sopenharmony_ci name = fileobj.name 20417db96d56Sopenharmony_ci 20427db96d56Sopenharmony_ci # Building the name of the member in the archive. 20437db96d56Sopenharmony_ci # Backward slashes are converted to forward slashes, 20447db96d56Sopenharmony_ci # Absolute paths are turned to relative paths. 20457db96d56Sopenharmony_ci if arcname is None: 20467db96d56Sopenharmony_ci arcname = name 20477db96d56Sopenharmony_ci drv, arcname = os.path.splitdrive(arcname) 20487db96d56Sopenharmony_ci arcname = arcname.replace(os.sep, "/") 20497db96d56Sopenharmony_ci arcname = arcname.lstrip("/") 20507db96d56Sopenharmony_ci 20517db96d56Sopenharmony_ci # Now, fill the TarInfo object with 20527db96d56Sopenharmony_ci # information specific for the file. 20537db96d56Sopenharmony_ci tarinfo = self.tarinfo() 20547db96d56Sopenharmony_ci tarinfo.tarfile = self # Not needed 20557db96d56Sopenharmony_ci 20567db96d56Sopenharmony_ci # Use os.stat or os.lstat, depending on if symlinks shall be resolved. 20577db96d56Sopenharmony_ci if fileobj is None: 20587db96d56Sopenharmony_ci if not self.dereference: 20597db96d56Sopenharmony_ci statres = os.lstat(name) 20607db96d56Sopenharmony_ci else: 20617db96d56Sopenharmony_ci statres = os.stat(name) 20627db96d56Sopenharmony_ci else: 20637db96d56Sopenharmony_ci statres = os.fstat(fileobj.fileno()) 20647db96d56Sopenharmony_ci linkname = "" 20657db96d56Sopenharmony_ci 20667db96d56Sopenharmony_ci stmd = statres.st_mode 20677db96d56Sopenharmony_ci if stat.S_ISREG(stmd): 20687db96d56Sopenharmony_ci inode = (statres.st_ino, statres.st_dev) 20697db96d56Sopenharmony_ci if not self.dereference and statres.st_nlink > 1 and \ 20707db96d56Sopenharmony_ci inode in self.inodes and arcname != self.inodes[inode]: 20717db96d56Sopenharmony_ci # Is it a hardlink to an already 20727db96d56Sopenharmony_ci # archived file? 20737db96d56Sopenharmony_ci type = LNKTYPE 20747db96d56Sopenharmony_ci linkname = self.inodes[inode] 20757db96d56Sopenharmony_ci else: 20767db96d56Sopenharmony_ci # The inode is added only if its valid. 20777db96d56Sopenharmony_ci # For win32 it is always 0. 20787db96d56Sopenharmony_ci type = REGTYPE 20797db96d56Sopenharmony_ci if inode[0]: 20807db96d56Sopenharmony_ci self.inodes[inode] = arcname 20817db96d56Sopenharmony_ci elif stat.S_ISDIR(stmd): 20827db96d56Sopenharmony_ci type = DIRTYPE 20837db96d56Sopenharmony_ci elif stat.S_ISFIFO(stmd): 20847db96d56Sopenharmony_ci type = FIFOTYPE 20857db96d56Sopenharmony_ci elif stat.S_ISLNK(stmd): 20867db96d56Sopenharmony_ci type = SYMTYPE 20877db96d56Sopenharmony_ci linkname = os.readlink(name) 20887db96d56Sopenharmony_ci elif stat.S_ISCHR(stmd): 20897db96d56Sopenharmony_ci type = CHRTYPE 20907db96d56Sopenharmony_ci elif stat.S_ISBLK(stmd): 20917db96d56Sopenharmony_ci type = BLKTYPE 20927db96d56Sopenharmony_ci else: 20937db96d56Sopenharmony_ci return None 20947db96d56Sopenharmony_ci 20957db96d56Sopenharmony_ci # Fill the TarInfo object with all 20967db96d56Sopenharmony_ci # information we can get. 20977db96d56Sopenharmony_ci tarinfo.name = arcname 20987db96d56Sopenharmony_ci tarinfo.mode = stmd 20997db96d56Sopenharmony_ci tarinfo.uid = statres.st_uid 21007db96d56Sopenharmony_ci tarinfo.gid = statres.st_gid 21017db96d56Sopenharmony_ci if type == REGTYPE: 21027db96d56Sopenharmony_ci tarinfo.size = statres.st_size 21037db96d56Sopenharmony_ci else: 21047db96d56Sopenharmony_ci tarinfo.size = 0 21057db96d56Sopenharmony_ci tarinfo.mtime = statres.st_mtime 21067db96d56Sopenharmony_ci tarinfo.type = type 21077db96d56Sopenharmony_ci tarinfo.linkname = linkname 21087db96d56Sopenharmony_ci if pwd: 21097db96d56Sopenharmony_ci try: 21107db96d56Sopenharmony_ci tarinfo.uname = pwd.getpwuid(tarinfo.uid)[0] 21117db96d56Sopenharmony_ci except KeyError: 21127db96d56Sopenharmony_ci pass 21137db96d56Sopenharmony_ci if grp: 21147db96d56Sopenharmony_ci try: 21157db96d56Sopenharmony_ci tarinfo.gname = grp.getgrgid(tarinfo.gid)[0] 21167db96d56Sopenharmony_ci except KeyError: 21177db96d56Sopenharmony_ci pass 21187db96d56Sopenharmony_ci 21197db96d56Sopenharmony_ci if type in (CHRTYPE, BLKTYPE): 21207db96d56Sopenharmony_ci if hasattr(os, "major") and hasattr(os, "minor"): 21217db96d56Sopenharmony_ci tarinfo.devmajor = os.major(statres.st_rdev) 21227db96d56Sopenharmony_ci tarinfo.devminor = os.minor(statres.st_rdev) 21237db96d56Sopenharmony_ci return tarinfo 21247db96d56Sopenharmony_ci 21257db96d56Sopenharmony_ci def list(self, verbose=True, *, members=None): 21267db96d56Sopenharmony_ci """Print a table of contents to sys.stdout. If `verbose' is False, only 21277db96d56Sopenharmony_ci the names of the members are printed. If it is True, an `ls -l'-like 21287db96d56Sopenharmony_ci output is produced. `members' is optional and must be a subset of the 21297db96d56Sopenharmony_ci list returned by getmembers(). 21307db96d56Sopenharmony_ci """ 21317db96d56Sopenharmony_ci self._check() 21327db96d56Sopenharmony_ci 21337db96d56Sopenharmony_ci if members is None: 21347db96d56Sopenharmony_ci members = self 21357db96d56Sopenharmony_ci for tarinfo in members: 21367db96d56Sopenharmony_ci if verbose: 21377db96d56Sopenharmony_ci if tarinfo.mode is None: 21387db96d56Sopenharmony_ci _safe_print("??????????") 21397db96d56Sopenharmony_ci else: 21407db96d56Sopenharmony_ci _safe_print(stat.filemode(tarinfo.mode)) 21417db96d56Sopenharmony_ci _safe_print("%s/%s" % (tarinfo.uname or tarinfo.uid, 21427db96d56Sopenharmony_ci tarinfo.gname or tarinfo.gid)) 21437db96d56Sopenharmony_ci if tarinfo.ischr() or tarinfo.isblk(): 21447db96d56Sopenharmony_ci _safe_print("%10s" % 21457db96d56Sopenharmony_ci ("%d,%d" % (tarinfo.devmajor, tarinfo.devminor))) 21467db96d56Sopenharmony_ci else: 21477db96d56Sopenharmony_ci _safe_print("%10d" % tarinfo.size) 21487db96d56Sopenharmony_ci if tarinfo.mtime is None: 21497db96d56Sopenharmony_ci _safe_print("????-??-?? ??:??:??") 21507db96d56Sopenharmony_ci else: 21517db96d56Sopenharmony_ci _safe_print("%d-%02d-%02d %02d:%02d:%02d" \ 21527db96d56Sopenharmony_ci % time.localtime(tarinfo.mtime)[:6]) 21537db96d56Sopenharmony_ci 21547db96d56Sopenharmony_ci _safe_print(tarinfo.name + ("/" if tarinfo.isdir() else "")) 21557db96d56Sopenharmony_ci 21567db96d56Sopenharmony_ci if verbose: 21577db96d56Sopenharmony_ci if tarinfo.issym(): 21587db96d56Sopenharmony_ci _safe_print("-> " + tarinfo.linkname) 21597db96d56Sopenharmony_ci if tarinfo.islnk(): 21607db96d56Sopenharmony_ci _safe_print("link to " + tarinfo.linkname) 21617db96d56Sopenharmony_ci print() 21627db96d56Sopenharmony_ci 21637db96d56Sopenharmony_ci def add(self, name, arcname=None, recursive=True, *, filter=None): 21647db96d56Sopenharmony_ci """Add the file `name' to the archive. `name' may be any type of file 21657db96d56Sopenharmony_ci (directory, fifo, symbolic link, etc.). If given, `arcname' 21667db96d56Sopenharmony_ci specifies an alternative name for the file in the archive. 21677db96d56Sopenharmony_ci Directories are added recursively by default. This can be avoided by 21687db96d56Sopenharmony_ci setting `recursive' to False. `filter' is a function 21697db96d56Sopenharmony_ci that expects a TarInfo object argument and returns the changed 21707db96d56Sopenharmony_ci TarInfo object, if it returns None the TarInfo object will be 21717db96d56Sopenharmony_ci excluded from the archive. 21727db96d56Sopenharmony_ci """ 21737db96d56Sopenharmony_ci self._check("awx") 21747db96d56Sopenharmony_ci 21757db96d56Sopenharmony_ci if arcname is None: 21767db96d56Sopenharmony_ci arcname = name 21777db96d56Sopenharmony_ci 21787db96d56Sopenharmony_ci # Skip if somebody tries to archive the archive... 21797db96d56Sopenharmony_ci if self.name is not None and os.path.abspath(name) == self.name: 21807db96d56Sopenharmony_ci self._dbg(2, "tarfile: Skipped %r" % name) 21817db96d56Sopenharmony_ci return 21827db96d56Sopenharmony_ci 21837db96d56Sopenharmony_ci self._dbg(1, name) 21847db96d56Sopenharmony_ci 21857db96d56Sopenharmony_ci # Create a TarInfo object from the file. 21867db96d56Sopenharmony_ci tarinfo = self.gettarinfo(name, arcname) 21877db96d56Sopenharmony_ci 21887db96d56Sopenharmony_ci if tarinfo is None: 21897db96d56Sopenharmony_ci self._dbg(1, "tarfile: Unsupported type %r" % name) 21907db96d56Sopenharmony_ci return 21917db96d56Sopenharmony_ci 21927db96d56Sopenharmony_ci # Change or exclude the TarInfo object. 21937db96d56Sopenharmony_ci if filter is not None: 21947db96d56Sopenharmony_ci tarinfo = filter(tarinfo) 21957db96d56Sopenharmony_ci if tarinfo is None: 21967db96d56Sopenharmony_ci self._dbg(2, "tarfile: Excluded %r" % name) 21977db96d56Sopenharmony_ci return 21987db96d56Sopenharmony_ci 21997db96d56Sopenharmony_ci # Append the tar header and data to the archive. 22007db96d56Sopenharmony_ci if tarinfo.isreg(): 22017db96d56Sopenharmony_ci with bltn_open(name, "rb") as f: 22027db96d56Sopenharmony_ci self.addfile(tarinfo, f) 22037db96d56Sopenharmony_ci 22047db96d56Sopenharmony_ci elif tarinfo.isdir(): 22057db96d56Sopenharmony_ci self.addfile(tarinfo) 22067db96d56Sopenharmony_ci if recursive: 22077db96d56Sopenharmony_ci for f in sorted(os.listdir(name)): 22087db96d56Sopenharmony_ci self.add(os.path.join(name, f), os.path.join(arcname, f), 22097db96d56Sopenharmony_ci recursive, filter=filter) 22107db96d56Sopenharmony_ci 22117db96d56Sopenharmony_ci else: 22127db96d56Sopenharmony_ci self.addfile(tarinfo) 22137db96d56Sopenharmony_ci 22147db96d56Sopenharmony_ci def addfile(self, tarinfo, fileobj=None): 22157db96d56Sopenharmony_ci """Add the TarInfo object `tarinfo' to the archive. If `fileobj' is 22167db96d56Sopenharmony_ci given, it should be a binary file, and tarinfo.size bytes are read 22177db96d56Sopenharmony_ci from it and added to the archive. You can create TarInfo objects 22187db96d56Sopenharmony_ci directly, or by using gettarinfo(). 22197db96d56Sopenharmony_ci """ 22207db96d56Sopenharmony_ci self._check("awx") 22217db96d56Sopenharmony_ci 22227db96d56Sopenharmony_ci tarinfo = copy.copy(tarinfo) 22237db96d56Sopenharmony_ci 22247db96d56Sopenharmony_ci buf = tarinfo.tobuf(self.format, self.encoding, self.errors) 22257db96d56Sopenharmony_ci self.fileobj.write(buf) 22267db96d56Sopenharmony_ci self.offset += len(buf) 22277db96d56Sopenharmony_ci bufsize=self.copybufsize 22287db96d56Sopenharmony_ci # If there's data to follow, append it. 22297db96d56Sopenharmony_ci if fileobj is not None: 22307db96d56Sopenharmony_ci copyfileobj(fileobj, self.fileobj, tarinfo.size, bufsize=bufsize) 22317db96d56Sopenharmony_ci blocks, remainder = divmod(tarinfo.size, BLOCKSIZE) 22327db96d56Sopenharmony_ci if remainder > 0: 22337db96d56Sopenharmony_ci self.fileobj.write(NUL * (BLOCKSIZE - remainder)) 22347db96d56Sopenharmony_ci blocks += 1 22357db96d56Sopenharmony_ci self.offset += blocks * BLOCKSIZE 22367db96d56Sopenharmony_ci 22377db96d56Sopenharmony_ci self.members.append(tarinfo) 22387db96d56Sopenharmony_ci 22397db96d56Sopenharmony_ci def _get_filter_function(self, filter): 22407db96d56Sopenharmony_ci if filter is None: 22417db96d56Sopenharmony_ci filter = self.extraction_filter 22427db96d56Sopenharmony_ci if filter is None: 22437db96d56Sopenharmony_ci return fully_trusted_filter 22447db96d56Sopenharmony_ci if isinstance(filter, str): 22457db96d56Sopenharmony_ci raise TypeError( 22467db96d56Sopenharmony_ci 'String names are not supported for ' 22477db96d56Sopenharmony_ci + 'TarFile.extraction_filter. Use a function such as ' 22487db96d56Sopenharmony_ci + 'tarfile.data_filter directly.') 22497db96d56Sopenharmony_ci return filter 22507db96d56Sopenharmony_ci if callable(filter): 22517db96d56Sopenharmony_ci return filter 22527db96d56Sopenharmony_ci try: 22537db96d56Sopenharmony_ci return _NAMED_FILTERS[filter] 22547db96d56Sopenharmony_ci except KeyError: 22557db96d56Sopenharmony_ci raise ValueError(f"filter {filter!r} not found") from None 22567db96d56Sopenharmony_ci 22577db96d56Sopenharmony_ci def extractall(self, path=".", members=None, *, numeric_owner=False, 22587db96d56Sopenharmony_ci filter=None): 22597db96d56Sopenharmony_ci """Extract all members from the archive to the current working 22607db96d56Sopenharmony_ci directory and set owner, modification time and permissions on 22617db96d56Sopenharmony_ci directories afterwards. `path' specifies a different directory 22627db96d56Sopenharmony_ci to extract to. `members' is optional and must be a subset of the 22637db96d56Sopenharmony_ci list returned by getmembers(). If `numeric_owner` is True, only 22647db96d56Sopenharmony_ci the numbers for user/group names are used and not the names. 22657db96d56Sopenharmony_ci 22667db96d56Sopenharmony_ci The `filter` function will be called on each member just 22677db96d56Sopenharmony_ci before extraction. 22687db96d56Sopenharmony_ci It can return a changed TarInfo or None to skip the member. 22697db96d56Sopenharmony_ci String names of common filters are accepted. 22707db96d56Sopenharmony_ci """ 22717db96d56Sopenharmony_ci directories = [] 22727db96d56Sopenharmony_ci 22737db96d56Sopenharmony_ci filter_function = self._get_filter_function(filter) 22747db96d56Sopenharmony_ci if members is None: 22757db96d56Sopenharmony_ci members = self 22767db96d56Sopenharmony_ci 22777db96d56Sopenharmony_ci for member in members: 22787db96d56Sopenharmony_ci tarinfo = self._get_extract_tarinfo(member, filter_function, path) 22797db96d56Sopenharmony_ci if tarinfo is None: 22807db96d56Sopenharmony_ci continue 22817db96d56Sopenharmony_ci if tarinfo.isdir(): 22827db96d56Sopenharmony_ci # For directories, delay setting attributes until later, 22837db96d56Sopenharmony_ci # since permissions can interfere with extraction and 22847db96d56Sopenharmony_ci # extracting contents can reset mtime. 22857db96d56Sopenharmony_ci directories.append(tarinfo) 22867db96d56Sopenharmony_ci self._extract_one(tarinfo, path, set_attrs=not tarinfo.isdir(), 22877db96d56Sopenharmony_ci numeric_owner=numeric_owner) 22887db96d56Sopenharmony_ci 22897db96d56Sopenharmony_ci # Reverse sort directories. 22907db96d56Sopenharmony_ci directories.sort(key=lambda a: a.name, reverse=True) 22917db96d56Sopenharmony_ci 22927db96d56Sopenharmony_ci # Set correct owner, mtime and filemode on directories. 22937db96d56Sopenharmony_ci for tarinfo in directories: 22947db96d56Sopenharmony_ci dirpath = os.path.join(path, tarinfo.name) 22957db96d56Sopenharmony_ci try: 22967db96d56Sopenharmony_ci self.chown(tarinfo, dirpath, numeric_owner=numeric_owner) 22977db96d56Sopenharmony_ci self.utime(tarinfo, dirpath) 22987db96d56Sopenharmony_ci self.chmod(tarinfo, dirpath) 22997db96d56Sopenharmony_ci except ExtractError as e: 23007db96d56Sopenharmony_ci self._handle_nonfatal_error(e) 23017db96d56Sopenharmony_ci 23027db96d56Sopenharmony_ci def extract(self, member, path="", set_attrs=True, *, numeric_owner=False, 23037db96d56Sopenharmony_ci filter=None): 23047db96d56Sopenharmony_ci """Extract a member from the archive to the current working directory, 23057db96d56Sopenharmony_ci using its full name. Its file information is extracted as accurately 23067db96d56Sopenharmony_ci as possible. `member' may be a filename or a TarInfo object. You can 23077db96d56Sopenharmony_ci specify a different directory using `path'. File attributes (owner, 23087db96d56Sopenharmony_ci mtime, mode) are set unless `set_attrs' is False. If `numeric_owner` 23097db96d56Sopenharmony_ci is True, only the numbers for user/group names are used and not 23107db96d56Sopenharmony_ci the names. 23117db96d56Sopenharmony_ci 23127db96d56Sopenharmony_ci The `filter` function will be called before extraction. 23137db96d56Sopenharmony_ci It can return a changed TarInfo or None to skip the member. 23147db96d56Sopenharmony_ci String names of common filters are accepted. 23157db96d56Sopenharmony_ci """ 23167db96d56Sopenharmony_ci filter_function = self._get_filter_function(filter) 23177db96d56Sopenharmony_ci tarinfo = self._get_extract_tarinfo(member, filter_function, path) 23187db96d56Sopenharmony_ci if tarinfo is not None: 23197db96d56Sopenharmony_ci self._extract_one(tarinfo, path, set_attrs, numeric_owner) 23207db96d56Sopenharmony_ci 23217db96d56Sopenharmony_ci def _get_extract_tarinfo(self, member, filter_function, path): 23227db96d56Sopenharmony_ci """Get filtered TarInfo (or None) from member, which might be a str""" 23237db96d56Sopenharmony_ci if isinstance(member, str): 23247db96d56Sopenharmony_ci tarinfo = self.getmember(member) 23257db96d56Sopenharmony_ci else: 23267db96d56Sopenharmony_ci tarinfo = member 23277db96d56Sopenharmony_ci 23287db96d56Sopenharmony_ci unfiltered = tarinfo 23297db96d56Sopenharmony_ci try: 23307db96d56Sopenharmony_ci tarinfo = filter_function(tarinfo, path) 23317db96d56Sopenharmony_ci except (OSError, FilterError) as e: 23327db96d56Sopenharmony_ci self._handle_fatal_error(e) 23337db96d56Sopenharmony_ci except ExtractError as e: 23347db96d56Sopenharmony_ci self._handle_nonfatal_error(e) 23357db96d56Sopenharmony_ci if tarinfo is None: 23367db96d56Sopenharmony_ci self._dbg(2, "tarfile: Excluded %r" % unfiltered.name) 23377db96d56Sopenharmony_ci return None 23387db96d56Sopenharmony_ci # Prepare the link target for makelink(). 23397db96d56Sopenharmony_ci if tarinfo.islnk(): 23407db96d56Sopenharmony_ci tarinfo = copy.copy(tarinfo) 23417db96d56Sopenharmony_ci tarinfo._link_target = os.path.join(path, tarinfo.linkname) 23427db96d56Sopenharmony_ci return tarinfo 23437db96d56Sopenharmony_ci 23447db96d56Sopenharmony_ci def _extract_one(self, tarinfo, path, set_attrs, numeric_owner): 23457db96d56Sopenharmony_ci """Extract from filtered tarinfo to disk""" 23467db96d56Sopenharmony_ci self._check("r") 23477db96d56Sopenharmony_ci 23487db96d56Sopenharmony_ci try: 23497db96d56Sopenharmony_ci self._extract_member(tarinfo, os.path.join(path, tarinfo.name), 23507db96d56Sopenharmony_ci set_attrs=set_attrs, 23517db96d56Sopenharmony_ci numeric_owner=numeric_owner) 23527db96d56Sopenharmony_ci except OSError as e: 23537db96d56Sopenharmony_ci self._handle_fatal_error(e) 23547db96d56Sopenharmony_ci except ExtractError as e: 23557db96d56Sopenharmony_ci self._handle_nonfatal_error(e) 23567db96d56Sopenharmony_ci 23577db96d56Sopenharmony_ci def _handle_nonfatal_error(self, e): 23587db96d56Sopenharmony_ci """Handle non-fatal error (ExtractError) according to errorlevel""" 23597db96d56Sopenharmony_ci if self.errorlevel > 1: 23607db96d56Sopenharmony_ci raise 23617db96d56Sopenharmony_ci else: 23627db96d56Sopenharmony_ci self._dbg(1, "tarfile: %s" % e) 23637db96d56Sopenharmony_ci 23647db96d56Sopenharmony_ci def _handle_fatal_error(self, e): 23657db96d56Sopenharmony_ci """Handle "fatal" error according to self.errorlevel""" 23667db96d56Sopenharmony_ci if self.errorlevel > 0: 23677db96d56Sopenharmony_ci raise 23687db96d56Sopenharmony_ci elif isinstance(e, OSError): 23697db96d56Sopenharmony_ci if e.filename is None: 23707db96d56Sopenharmony_ci self._dbg(1, "tarfile: %s" % e.strerror) 23717db96d56Sopenharmony_ci else: 23727db96d56Sopenharmony_ci self._dbg(1, "tarfile: %s %r" % (e.strerror, e.filename)) 23737db96d56Sopenharmony_ci else: 23747db96d56Sopenharmony_ci self._dbg(1, "tarfile: %s %s" % (type(e).__name__, e)) 23757db96d56Sopenharmony_ci 23767db96d56Sopenharmony_ci def extractfile(self, member): 23777db96d56Sopenharmony_ci """Extract a member from the archive as a file object. `member' may be 23787db96d56Sopenharmony_ci a filename or a TarInfo object. If `member' is a regular file or 23797db96d56Sopenharmony_ci a link, an io.BufferedReader object is returned. For all other 23807db96d56Sopenharmony_ci existing members, None is returned. If `member' does not appear 23817db96d56Sopenharmony_ci in the archive, KeyError is raised. 23827db96d56Sopenharmony_ci """ 23837db96d56Sopenharmony_ci self._check("r") 23847db96d56Sopenharmony_ci 23857db96d56Sopenharmony_ci if isinstance(member, str): 23867db96d56Sopenharmony_ci tarinfo = self.getmember(member) 23877db96d56Sopenharmony_ci else: 23887db96d56Sopenharmony_ci tarinfo = member 23897db96d56Sopenharmony_ci 23907db96d56Sopenharmony_ci if tarinfo.isreg() or tarinfo.type not in SUPPORTED_TYPES: 23917db96d56Sopenharmony_ci # Members with unknown types are treated as regular files. 23927db96d56Sopenharmony_ci return self.fileobject(self, tarinfo) 23937db96d56Sopenharmony_ci 23947db96d56Sopenharmony_ci elif tarinfo.islnk() or tarinfo.issym(): 23957db96d56Sopenharmony_ci if isinstance(self.fileobj, _Stream): 23967db96d56Sopenharmony_ci # A small but ugly workaround for the case that someone tries 23977db96d56Sopenharmony_ci # to extract a (sym)link as a file-object from a non-seekable 23987db96d56Sopenharmony_ci # stream of tar blocks. 23997db96d56Sopenharmony_ci raise StreamError("cannot extract (sym)link as file object") 24007db96d56Sopenharmony_ci else: 24017db96d56Sopenharmony_ci # A (sym)link's file object is its target's file object. 24027db96d56Sopenharmony_ci return self.extractfile(self._find_link_target(tarinfo)) 24037db96d56Sopenharmony_ci else: 24047db96d56Sopenharmony_ci # If there's no data associated with the member (directory, chrdev, 24057db96d56Sopenharmony_ci # blkdev, etc.), return None instead of a file object. 24067db96d56Sopenharmony_ci return None 24077db96d56Sopenharmony_ci 24087db96d56Sopenharmony_ci def _extract_member(self, tarinfo, targetpath, set_attrs=True, 24097db96d56Sopenharmony_ci numeric_owner=False): 24107db96d56Sopenharmony_ci """Extract the TarInfo object tarinfo to a physical 24117db96d56Sopenharmony_ci file called targetpath. 24127db96d56Sopenharmony_ci """ 24137db96d56Sopenharmony_ci # Fetch the TarInfo object for the given name 24147db96d56Sopenharmony_ci # and build the destination pathname, replacing 24157db96d56Sopenharmony_ci # forward slashes to platform specific separators. 24167db96d56Sopenharmony_ci targetpath = targetpath.rstrip("/") 24177db96d56Sopenharmony_ci targetpath = targetpath.replace("/", os.sep) 24187db96d56Sopenharmony_ci 24197db96d56Sopenharmony_ci # Create all upper directories. 24207db96d56Sopenharmony_ci upperdirs = os.path.dirname(targetpath) 24217db96d56Sopenharmony_ci if upperdirs and not os.path.exists(upperdirs): 24227db96d56Sopenharmony_ci # Create directories that are not part of the archive with 24237db96d56Sopenharmony_ci # default permissions. 24247db96d56Sopenharmony_ci os.makedirs(upperdirs) 24257db96d56Sopenharmony_ci 24267db96d56Sopenharmony_ci if tarinfo.islnk() or tarinfo.issym(): 24277db96d56Sopenharmony_ci self._dbg(1, "%s -> %s" % (tarinfo.name, tarinfo.linkname)) 24287db96d56Sopenharmony_ci else: 24297db96d56Sopenharmony_ci self._dbg(1, tarinfo.name) 24307db96d56Sopenharmony_ci 24317db96d56Sopenharmony_ci if tarinfo.isreg(): 24327db96d56Sopenharmony_ci self.makefile(tarinfo, targetpath) 24337db96d56Sopenharmony_ci elif tarinfo.isdir(): 24347db96d56Sopenharmony_ci self.makedir(tarinfo, targetpath) 24357db96d56Sopenharmony_ci elif tarinfo.isfifo(): 24367db96d56Sopenharmony_ci self.makefifo(tarinfo, targetpath) 24377db96d56Sopenharmony_ci elif tarinfo.ischr() or tarinfo.isblk(): 24387db96d56Sopenharmony_ci self.makedev(tarinfo, targetpath) 24397db96d56Sopenharmony_ci elif tarinfo.islnk() or tarinfo.issym(): 24407db96d56Sopenharmony_ci self.makelink(tarinfo, targetpath) 24417db96d56Sopenharmony_ci elif tarinfo.type not in SUPPORTED_TYPES: 24427db96d56Sopenharmony_ci self.makeunknown(tarinfo, targetpath) 24437db96d56Sopenharmony_ci else: 24447db96d56Sopenharmony_ci self.makefile(tarinfo, targetpath) 24457db96d56Sopenharmony_ci 24467db96d56Sopenharmony_ci if set_attrs: 24477db96d56Sopenharmony_ci self.chown(tarinfo, targetpath, numeric_owner) 24487db96d56Sopenharmony_ci if not tarinfo.issym(): 24497db96d56Sopenharmony_ci self.chmod(tarinfo, targetpath) 24507db96d56Sopenharmony_ci self.utime(tarinfo, targetpath) 24517db96d56Sopenharmony_ci 24527db96d56Sopenharmony_ci #-------------------------------------------------------------------------- 24537db96d56Sopenharmony_ci # Below are the different file methods. They are called via 24547db96d56Sopenharmony_ci # _extract_member() when extract() is called. They can be replaced in a 24557db96d56Sopenharmony_ci # subclass to implement other functionality. 24567db96d56Sopenharmony_ci 24577db96d56Sopenharmony_ci def makedir(self, tarinfo, targetpath): 24587db96d56Sopenharmony_ci """Make a directory called targetpath. 24597db96d56Sopenharmony_ci """ 24607db96d56Sopenharmony_ci try: 24617db96d56Sopenharmony_ci if tarinfo.mode is None: 24627db96d56Sopenharmony_ci # Use the system's default mode 24637db96d56Sopenharmony_ci os.mkdir(targetpath) 24647db96d56Sopenharmony_ci else: 24657db96d56Sopenharmony_ci # Use a safe mode for the directory, the real mode is set 24667db96d56Sopenharmony_ci # later in _extract_member(). 24677db96d56Sopenharmony_ci os.mkdir(targetpath, 0o700) 24687db96d56Sopenharmony_ci except FileExistsError: 24697db96d56Sopenharmony_ci pass 24707db96d56Sopenharmony_ci 24717db96d56Sopenharmony_ci def makefile(self, tarinfo, targetpath): 24727db96d56Sopenharmony_ci """Make a file called targetpath. 24737db96d56Sopenharmony_ci """ 24747db96d56Sopenharmony_ci source = self.fileobj 24757db96d56Sopenharmony_ci source.seek(tarinfo.offset_data) 24767db96d56Sopenharmony_ci bufsize = self.copybufsize 24777db96d56Sopenharmony_ci with bltn_open(targetpath, "wb") as target: 24787db96d56Sopenharmony_ci if tarinfo.sparse is not None: 24797db96d56Sopenharmony_ci for offset, size in tarinfo.sparse: 24807db96d56Sopenharmony_ci target.seek(offset) 24817db96d56Sopenharmony_ci copyfileobj(source, target, size, ReadError, bufsize) 24827db96d56Sopenharmony_ci target.seek(tarinfo.size) 24837db96d56Sopenharmony_ci target.truncate() 24847db96d56Sopenharmony_ci else: 24857db96d56Sopenharmony_ci copyfileobj(source, target, tarinfo.size, ReadError, bufsize) 24867db96d56Sopenharmony_ci 24877db96d56Sopenharmony_ci def makeunknown(self, tarinfo, targetpath): 24887db96d56Sopenharmony_ci """Make a file from a TarInfo object with an unknown type 24897db96d56Sopenharmony_ci at targetpath. 24907db96d56Sopenharmony_ci """ 24917db96d56Sopenharmony_ci self.makefile(tarinfo, targetpath) 24927db96d56Sopenharmony_ci self._dbg(1, "tarfile: Unknown file type %r, " \ 24937db96d56Sopenharmony_ci "extracted as regular file." % tarinfo.type) 24947db96d56Sopenharmony_ci 24957db96d56Sopenharmony_ci def makefifo(self, tarinfo, targetpath): 24967db96d56Sopenharmony_ci """Make a fifo called targetpath. 24977db96d56Sopenharmony_ci """ 24987db96d56Sopenharmony_ci if hasattr(os, "mkfifo"): 24997db96d56Sopenharmony_ci os.mkfifo(targetpath) 25007db96d56Sopenharmony_ci else: 25017db96d56Sopenharmony_ci raise ExtractError("fifo not supported by system") 25027db96d56Sopenharmony_ci 25037db96d56Sopenharmony_ci def makedev(self, tarinfo, targetpath): 25047db96d56Sopenharmony_ci """Make a character or block device called targetpath. 25057db96d56Sopenharmony_ci """ 25067db96d56Sopenharmony_ci if not hasattr(os, "mknod") or not hasattr(os, "makedev"): 25077db96d56Sopenharmony_ci raise ExtractError("special devices not supported by system") 25087db96d56Sopenharmony_ci 25097db96d56Sopenharmony_ci mode = tarinfo.mode 25107db96d56Sopenharmony_ci if mode is None: 25117db96d56Sopenharmony_ci # Use mknod's default 25127db96d56Sopenharmony_ci mode = 0o600 25137db96d56Sopenharmony_ci if tarinfo.isblk(): 25147db96d56Sopenharmony_ci mode |= stat.S_IFBLK 25157db96d56Sopenharmony_ci else: 25167db96d56Sopenharmony_ci mode |= stat.S_IFCHR 25177db96d56Sopenharmony_ci 25187db96d56Sopenharmony_ci os.mknod(targetpath, mode, 25197db96d56Sopenharmony_ci os.makedev(tarinfo.devmajor, tarinfo.devminor)) 25207db96d56Sopenharmony_ci 25217db96d56Sopenharmony_ci def makelink(self, tarinfo, targetpath): 25227db96d56Sopenharmony_ci """Make a (symbolic) link called targetpath. If it cannot be created 25237db96d56Sopenharmony_ci (platform limitation), we try to make a copy of the referenced file 25247db96d56Sopenharmony_ci instead of a link. 25257db96d56Sopenharmony_ci """ 25267db96d56Sopenharmony_ci try: 25277db96d56Sopenharmony_ci # For systems that support symbolic and hard links. 25287db96d56Sopenharmony_ci if tarinfo.issym(): 25297db96d56Sopenharmony_ci if os.path.lexists(targetpath): 25307db96d56Sopenharmony_ci # Avoid FileExistsError on following os.symlink. 25317db96d56Sopenharmony_ci os.unlink(targetpath) 25327db96d56Sopenharmony_ci os.symlink(tarinfo.linkname, targetpath) 25337db96d56Sopenharmony_ci else: 25347db96d56Sopenharmony_ci if os.path.exists(tarinfo._link_target): 25357db96d56Sopenharmony_ci os.link(tarinfo._link_target, targetpath) 25367db96d56Sopenharmony_ci else: 25377db96d56Sopenharmony_ci self._extract_member(self._find_link_target(tarinfo), 25387db96d56Sopenharmony_ci targetpath) 25397db96d56Sopenharmony_ci except symlink_exception: 25407db96d56Sopenharmony_ci try: 25417db96d56Sopenharmony_ci self._extract_member(self._find_link_target(tarinfo), 25427db96d56Sopenharmony_ci targetpath) 25437db96d56Sopenharmony_ci except KeyError: 25447db96d56Sopenharmony_ci raise ExtractError("unable to resolve link inside archive") from None 25457db96d56Sopenharmony_ci 25467db96d56Sopenharmony_ci def chown(self, tarinfo, targetpath, numeric_owner): 25477db96d56Sopenharmony_ci """Set owner of targetpath according to tarinfo. If numeric_owner 25487db96d56Sopenharmony_ci is True, use .gid/.uid instead of .gname/.uname. If numeric_owner 25497db96d56Sopenharmony_ci is False, fall back to .gid/.uid when the search based on name 25507db96d56Sopenharmony_ci fails. 25517db96d56Sopenharmony_ci """ 25527db96d56Sopenharmony_ci if hasattr(os, "geteuid") and os.geteuid() == 0: 25537db96d56Sopenharmony_ci # We have to be root to do so. 25547db96d56Sopenharmony_ci g = tarinfo.gid 25557db96d56Sopenharmony_ci u = tarinfo.uid 25567db96d56Sopenharmony_ci if not numeric_owner: 25577db96d56Sopenharmony_ci try: 25587db96d56Sopenharmony_ci if grp and tarinfo.gname: 25597db96d56Sopenharmony_ci g = grp.getgrnam(tarinfo.gname)[2] 25607db96d56Sopenharmony_ci except KeyError: 25617db96d56Sopenharmony_ci pass 25627db96d56Sopenharmony_ci try: 25637db96d56Sopenharmony_ci if pwd and tarinfo.uname: 25647db96d56Sopenharmony_ci u = pwd.getpwnam(tarinfo.uname)[2] 25657db96d56Sopenharmony_ci except KeyError: 25667db96d56Sopenharmony_ci pass 25677db96d56Sopenharmony_ci if g is None: 25687db96d56Sopenharmony_ci g = -1 25697db96d56Sopenharmony_ci if u is None: 25707db96d56Sopenharmony_ci u = -1 25717db96d56Sopenharmony_ci try: 25727db96d56Sopenharmony_ci if tarinfo.issym() and hasattr(os, "lchown"): 25737db96d56Sopenharmony_ci os.lchown(targetpath, u, g) 25747db96d56Sopenharmony_ci else: 25757db96d56Sopenharmony_ci os.chown(targetpath, u, g) 25767db96d56Sopenharmony_ci except OSError as e: 25777db96d56Sopenharmony_ci raise ExtractError("could not change owner") from e 25787db96d56Sopenharmony_ci 25797db96d56Sopenharmony_ci def chmod(self, tarinfo, targetpath): 25807db96d56Sopenharmony_ci """Set file permissions of targetpath according to tarinfo. 25817db96d56Sopenharmony_ci """ 25827db96d56Sopenharmony_ci if tarinfo.mode is None: 25837db96d56Sopenharmony_ci return 25847db96d56Sopenharmony_ci try: 25857db96d56Sopenharmony_ci os.chmod(targetpath, tarinfo.mode) 25867db96d56Sopenharmony_ci except OSError as e: 25877db96d56Sopenharmony_ci raise ExtractError("could not change mode") from e 25887db96d56Sopenharmony_ci 25897db96d56Sopenharmony_ci def utime(self, tarinfo, targetpath): 25907db96d56Sopenharmony_ci """Set modification time of targetpath according to tarinfo. 25917db96d56Sopenharmony_ci """ 25927db96d56Sopenharmony_ci mtime = tarinfo.mtime 25937db96d56Sopenharmony_ci if mtime is None: 25947db96d56Sopenharmony_ci return 25957db96d56Sopenharmony_ci if not hasattr(os, 'utime'): 25967db96d56Sopenharmony_ci return 25977db96d56Sopenharmony_ci try: 25987db96d56Sopenharmony_ci os.utime(targetpath, (mtime, mtime)) 25997db96d56Sopenharmony_ci except OSError as e: 26007db96d56Sopenharmony_ci raise ExtractError("could not change modification time") from e 26017db96d56Sopenharmony_ci 26027db96d56Sopenharmony_ci #-------------------------------------------------------------------------- 26037db96d56Sopenharmony_ci def next(self): 26047db96d56Sopenharmony_ci """Return the next member of the archive as a TarInfo object, when 26057db96d56Sopenharmony_ci TarFile is opened for reading. Return None if there is no more 26067db96d56Sopenharmony_ci available. 26077db96d56Sopenharmony_ci """ 26087db96d56Sopenharmony_ci self._check("ra") 26097db96d56Sopenharmony_ci if self.firstmember is not None: 26107db96d56Sopenharmony_ci m = self.firstmember 26117db96d56Sopenharmony_ci self.firstmember = None 26127db96d56Sopenharmony_ci return m 26137db96d56Sopenharmony_ci 26147db96d56Sopenharmony_ci # Advance the file pointer. 26157db96d56Sopenharmony_ci if self.offset != self.fileobj.tell(): 26167db96d56Sopenharmony_ci if self.offset == 0: 26177db96d56Sopenharmony_ci return None 26187db96d56Sopenharmony_ci self.fileobj.seek(self.offset - 1) 26197db96d56Sopenharmony_ci if not self.fileobj.read(1): 26207db96d56Sopenharmony_ci raise ReadError("unexpected end of data") 26217db96d56Sopenharmony_ci 26227db96d56Sopenharmony_ci # Read the next block. 26237db96d56Sopenharmony_ci tarinfo = None 26247db96d56Sopenharmony_ci while True: 26257db96d56Sopenharmony_ci try: 26267db96d56Sopenharmony_ci tarinfo = self.tarinfo.fromtarfile(self) 26277db96d56Sopenharmony_ci except EOFHeaderError as e: 26287db96d56Sopenharmony_ci if self.ignore_zeros: 26297db96d56Sopenharmony_ci self._dbg(2, "0x%X: %s" % (self.offset, e)) 26307db96d56Sopenharmony_ci self.offset += BLOCKSIZE 26317db96d56Sopenharmony_ci continue 26327db96d56Sopenharmony_ci except InvalidHeaderError as e: 26337db96d56Sopenharmony_ci if self.ignore_zeros: 26347db96d56Sopenharmony_ci self._dbg(2, "0x%X: %s" % (self.offset, e)) 26357db96d56Sopenharmony_ci self.offset += BLOCKSIZE 26367db96d56Sopenharmony_ci continue 26377db96d56Sopenharmony_ci elif self.offset == 0: 26387db96d56Sopenharmony_ci raise ReadError(str(e)) from None 26397db96d56Sopenharmony_ci except EmptyHeaderError: 26407db96d56Sopenharmony_ci if self.offset == 0: 26417db96d56Sopenharmony_ci raise ReadError("empty file") from None 26427db96d56Sopenharmony_ci except TruncatedHeaderError as e: 26437db96d56Sopenharmony_ci if self.offset == 0: 26447db96d56Sopenharmony_ci raise ReadError(str(e)) from None 26457db96d56Sopenharmony_ci except SubsequentHeaderError as e: 26467db96d56Sopenharmony_ci raise ReadError(str(e)) from None 26477db96d56Sopenharmony_ci except Exception as e: 26487db96d56Sopenharmony_ci try: 26497db96d56Sopenharmony_ci import zlib 26507db96d56Sopenharmony_ci if isinstance(e, zlib.error): 26517db96d56Sopenharmony_ci raise ReadError(f'zlib error: {e}') from None 26527db96d56Sopenharmony_ci else: 26537db96d56Sopenharmony_ci raise e 26547db96d56Sopenharmony_ci except ImportError: 26557db96d56Sopenharmony_ci raise e 26567db96d56Sopenharmony_ci break 26577db96d56Sopenharmony_ci 26587db96d56Sopenharmony_ci if tarinfo is not None: 26597db96d56Sopenharmony_ci self.members.append(tarinfo) 26607db96d56Sopenharmony_ci else: 26617db96d56Sopenharmony_ci self._loaded = True 26627db96d56Sopenharmony_ci 26637db96d56Sopenharmony_ci return tarinfo 26647db96d56Sopenharmony_ci 26657db96d56Sopenharmony_ci #-------------------------------------------------------------------------- 26667db96d56Sopenharmony_ci # Little helper methods: 26677db96d56Sopenharmony_ci 26687db96d56Sopenharmony_ci def _getmember(self, name, tarinfo=None, normalize=False): 26697db96d56Sopenharmony_ci """Find an archive member by name from bottom to top. 26707db96d56Sopenharmony_ci If tarinfo is given, it is used as the starting point. 26717db96d56Sopenharmony_ci """ 26727db96d56Sopenharmony_ci # Ensure that all members have been loaded. 26737db96d56Sopenharmony_ci members = self.getmembers() 26747db96d56Sopenharmony_ci 26757db96d56Sopenharmony_ci # Limit the member search list up to tarinfo. 26767db96d56Sopenharmony_ci skipping = False 26777db96d56Sopenharmony_ci if tarinfo is not None: 26787db96d56Sopenharmony_ci try: 26797db96d56Sopenharmony_ci index = members.index(tarinfo) 26807db96d56Sopenharmony_ci except ValueError: 26817db96d56Sopenharmony_ci # The given starting point might be a (modified) copy. 26827db96d56Sopenharmony_ci # We'll later skip members until we find an equivalent. 26837db96d56Sopenharmony_ci skipping = True 26847db96d56Sopenharmony_ci else: 26857db96d56Sopenharmony_ci # Happy fast path 26867db96d56Sopenharmony_ci members = members[:index] 26877db96d56Sopenharmony_ci 26887db96d56Sopenharmony_ci if normalize: 26897db96d56Sopenharmony_ci name = os.path.normpath(name) 26907db96d56Sopenharmony_ci 26917db96d56Sopenharmony_ci for member in reversed(members): 26927db96d56Sopenharmony_ci if skipping: 26937db96d56Sopenharmony_ci if tarinfo.offset == member.offset: 26947db96d56Sopenharmony_ci skipping = False 26957db96d56Sopenharmony_ci continue 26967db96d56Sopenharmony_ci if normalize: 26977db96d56Sopenharmony_ci member_name = os.path.normpath(member.name) 26987db96d56Sopenharmony_ci else: 26997db96d56Sopenharmony_ci member_name = member.name 27007db96d56Sopenharmony_ci 27017db96d56Sopenharmony_ci if name == member_name: 27027db96d56Sopenharmony_ci return member 27037db96d56Sopenharmony_ci 27047db96d56Sopenharmony_ci if skipping: 27057db96d56Sopenharmony_ci # Starting point was not found 27067db96d56Sopenharmony_ci raise ValueError(tarinfo) 27077db96d56Sopenharmony_ci 27087db96d56Sopenharmony_ci def _load(self): 27097db96d56Sopenharmony_ci """Read through the entire archive file and look for readable 27107db96d56Sopenharmony_ci members. 27117db96d56Sopenharmony_ci """ 27127db96d56Sopenharmony_ci while True: 27137db96d56Sopenharmony_ci tarinfo = self.next() 27147db96d56Sopenharmony_ci if tarinfo is None: 27157db96d56Sopenharmony_ci break 27167db96d56Sopenharmony_ci self._loaded = True 27177db96d56Sopenharmony_ci 27187db96d56Sopenharmony_ci def _check(self, mode=None): 27197db96d56Sopenharmony_ci """Check if TarFile is still open, and if the operation's mode 27207db96d56Sopenharmony_ci corresponds to TarFile's mode. 27217db96d56Sopenharmony_ci """ 27227db96d56Sopenharmony_ci if self.closed: 27237db96d56Sopenharmony_ci raise OSError("%s is closed" % self.__class__.__name__) 27247db96d56Sopenharmony_ci if mode is not None and self.mode not in mode: 27257db96d56Sopenharmony_ci raise OSError("bad operation for mode %r" % self.mode) 27267db96d56Sopenharmony_ci 27277db96d56Sopenharmony_ci def _find_link_target(self, tarinfo): 27287db96d56Sopenharmony_ci """Find the target member of a symlink or hardlink member in the 27297db96d56Sopenharmony_ci archive. 27307db96d56Sopenharmony_ci """ 27317db96d56Sopenharmony_ci if tarinfo.issym(): 27327db96d56Sopenharmony_ci # Always search the entire archive. 27337db96d56Sopenharmony_ci linkname = "/".join(filter(None, (os.path.dirname(tarinfo.name), tarinfo.linkname))) 27347db96d56Sopenharmony_ci limit = None 27357db96d56Sopenharmony_ci else: 27367db96d56Sopenharmony_ci # Search the archive before the link, because a hard link is 27377db96d56Sopenharmony_ci # just a reference to an already archived file. 27387db96d56Sopenharmony_ci linkname = tarinfo.linkname 27397db96d56Sopenharmony_ci limit = tarinfo 27407db96d56Sopenharmony_ci 27417db96d56Sopenharmony_ci member = self._getmember(linkname, tarinfo=limit, normalize=True) 27427db96d56Sopenharmony_ci if member is None: 27437db96d56Sopenharmony_ci raise KeyError("linkname %r not found" % linkname) 27447db96d56Sopenharmony_ci return member 27457db96d56Sopenharmony_ci 27467db96d56Sopenharmony_ci def __iter__(self): 27477db96d56Sopenharmony_ci """Provide an iterator object. 27487db96d56Sopenharmony_ci """ 27497db96d56Sopenharmony_ci if self._loaded: 27507db96d56Sopenharmony_ci yield from self.members 27517db96d56Sopenharmony_ci return 27527db96d56Sopenharmony_ci 27537db96d56Sopenharmony_ci # Yield items using TarFile's next() method. 27547db96d56Sopenharmony_ci # When all members have been read, set TarFile as _loaded. 27557db96d56Sopenharmony_ci index = 0 27567db96d56Sopenharmony_ci # Fix for SF #1100429: Under rare circumstances it can 27577db96d56Sopenharmony_ci # happen that getmembers() is called during iteration, 27587db96d56Sopenharmony_ci # which will have already exhausted the next() method. 27597db96d56Sopenharmony_ci if self.firstmember is not None: 27607db96d56Sopenharmony_ci tarinfo = self.next() 27617db96d56Sopenharmony_ci index += 1 27627db96d56Sopenharmony_ci yield tarinfo 27637db96d56Sopenharmony_ci 27647db96d56Sopenharmony_ci while True: 27657db96d56Sopenharmony_ci if index < len(self.members): 27667db96d56Sopenharmony_ci tarinfo = self.members[index] 27677db96d56Sopenharmony_ci elif not self._loaded: 27687db96d56Sopenharmony_ci tarinfo = self.next() 27697db96d56Sopenharmony_ci if not tarinfo: 27707db96d56Sopenharmony_ci self._loaded = True 27717db96d56Sopenharmony_ci return 27727db96d56Sopenharmony_ci else: 27737db96d56Sopenharmony_ci return 27747db96d56Sopenharmony_ci index += 1 27757db96d56Sopenharmony_ci yield tarinfo 27767db96d56Sopenharmony_ci 27777db96d56Sopenharmony_ci def _dbg(self, level, msg): 27787db96d56Sopenharmony_ci """Write debugging output to sys.stderr. 27797db96d56Sopenharmony_ci """ 27807db96d56Sopenharmony_ci if level <= self.debug: 27817db96d56Sopenharmony_ci print(msg, file=sys.stderr) 27827db96d56Sopenharmony_ci 27837db96d56Sopenharmony_ci def __enter__(self): 27847db96d56Sopenharmony_ci self._check() 27857db96d56Sopenharmony_ci return self 27867db96d56Sopenharmony_ci 27877db96d56Sopenharmony_ci def __exit__(self, type, value, traceback): 27887db96d56Sopenharmony_ci if type is None: 27897db96d56Sopenharmony_ci self.close() 27907db96d56Sopenharmony_ci else: 27917db96d56Sopenharmony_ci # An exception occurred. We must not call close() because 27927db96d56Sopenharmony_ci # it would try to write end-of-archive blocks and padding. 27937db96d56Sopenharmony_ci if not self._extfileobj: 27947db96d56Sopenharmony_ci self.fileobj.close() 27957db96d56Sopenharmony_ci self.closed = True 27967db96d56Sopenharmony_ci 27977db96d56Sopenharmony_ci#-------------------- 27987db96d56Sopenharmony_ci# exported functions 27997db96d56Sopenharmony_ci#-------------------- 28007db96d56Sopenharmony_ci 28017db96d56Sopenharmony_cidef is_tarfile(name): 28027db96d56Sopenharmony_ci """Return True if name points to a tar archive that we 28037db96d56Sopenharmony_ci are able to handle, else return False. 28047db96d56Sopenharmony_ci 28057db96d56Sopenharmony_ci 'name' should be a string, file, or file-like object. 28067db96d56Sopenharmony_ci """ 28077db96d56Sopenharmony_ci try: 28087db96d56Sopenharmony_ci if hasattr(name, "read"): 28097db96d56Sopenharmony_ci pos = name.tell() 28107db96d56Sopenharmony_ci t = open(fileobj=name) 28117db96d56Sopenharmony_ci name.seek(pos) 28127db96d56Sopenharmony_ci else: 28137db96d56Sopenharmony_ci t = open(name) 28147db96d56Sopenharmony_ci t.close() 28157db96d56Sopenharmony_ci return True 28167db96d56Sopenharmony_ci except TarError: 28177db96d56Sopenharmony_ci return False 28187db96d56Sopenharmony_ci 28197db96d56Sopenharmony_ciopen = TarFile.open 28207db96d56Sopenharmony_ci 28217db96d56Sopenharmony_ci 28227db96d56Sopenharmony_cidef main(): 28237db96d56Sopenharmony_ci import argparse 28247db96d56Sopenharmony_ci 28257db96d56Sopenharmony_ci description = 'A simple command-line interface for tarfile module.' 28267db96d56Sopenharmony_ci parser = argparse.ArgumentParser(description=description) 28277db96d56Sopenharmony_ci parser.add_argument('-v', '--verbose', action='store_true', default=False, 28287db96d56Sopenharmony_ci help='Verbose output') 28297db96d56Sopenharmony_ci parser.add_argument('--filter', metavar='<filtername>', 28307db96d56Sopenharmony_ci choices=_NAMED_FILTERS, 28317db96d56Sopenharmony_ci help='Filter for extraction') 28327db96d56Sopenharmony_ci 28337db96d56Sopenharmony_ci group = parser.add_mutually_exclusive_group(required=True) 28347db96d56Sopenharmony_ci group.add_argument('-l', '--list', metavar='<tarfile>', 28357db96d56Sopenharmony_ci help='Show listing of a tarfile') 28367db96d56Sopenharmony_ci group.add_argument('-e', '--extract', nargs='+', 28377db96d56Sopenharmony_ci metavar=('<tarfile>', '<output_dir>'), 28387db96d56Sopenharmony_ci help='Extract tarfile into target dir') 28397db96d56Sopenharmony_ci group.add_argument('-c', '--create', nargs='+', 28407db96d56Sopenharmony_ci metavar=('<name>', '<file>'), 28417db96d56Sopenharmony_ci help='Create tarfile from sources') 28427db96d56Sopenharmony_ci group.add_argument('-t', '--test', metavar='<tarfile>', 28437db96d56Sopenharmony_ci help='Test if a tarfile is valid') 28447db96d56Sopenharmony_ci 28457db96d56Sopenharmony_ci args = parser.parse_args() 28467db96d56Sopenharmony_ci 28477db96d56Sopenharmony_ci if args.filter and args.extract is None: 28487db96d56Sopenharmony_ci parser.exit(1, '--filter is only valid for extraction\n') 28497db96d56Sopenharmony_ci 28507db96d56Sopenharmony_ci if args.test is not None: 28517db96d56Sopenharmony_ci src = args.test 28527db96d56Sopenharmony_ci if is_tarfile(src): 28537db96d56Sopenharmony_ci with open(src, 'r') as tar: 28547db96d56Sopenharmony_ci tar.getmembers() 28557db96d56Sopenharmony_ci print(tar.getmembers(), file=sys.stderr) 28567db96d56Sopenharmony_ci if args.verbose: 28577db96d56Sopenharmony_ci print('{!r} is a tar archive.'.format(src)) 28587db96d56Sopenharmony_ci else: 28597db96d56Sopenharmony_ci parser.exit(1, '{!r} is not a tar archive.\n'.format(src)) 28607db96d56Sopenharmony_ci 28617db96d56Sopenharmony_ci elif args.list is not None: 28627db96d56Sopenharmony_ci src = args.list 28637db96d56Sopenharmony_ci if is_tarfile(src): 28647db96d56Sopenharmony_ci with TarFile.open(src, 'r:*') as tf: 28657db96d56Sopenharmony_ci tf.list(verbose=args.verbose) 28667db96d56Sopenharmony_ci else: 28677db96d56Sopenharmony_ci parser.exit(1, '{!r} is not a tar archive.\n'.format(src)) 28687db96d56Sopenharmony_ci 28697db96d56Sopenharmony_ci elif args.extract is not None: 28707db96d56Sopenharmony_ci if len(args.extract) == 1: 28717db96d56Sopenharmony_ci src = args.extract[0] 28727db96d56Sopenharmony_ci curdir = os.curdir 28737db96d56Sopenharmony_ci elif len(args.extract) == 2: 28747db96d56Sopenharmony_ci src, curdir = args.extract 28757db96d56Sopenharmony_ci else: 28767db96d56Sopenharmony_ci parser.exit(1, parser.format_help()) 28777db96d56Sopenharmony_ci 28787db96d56Sopenharmony_ci if is_tarfile(src): 28797db96d56Sopenharmony_ci with TarFile.open(src, 'r:*') as tf: 28807db96d56Sopenharmony_ci tf.extractall(path=curdir, filter=args.filter) 28817db96d56Sopenharmony_ci if args.verbose: 28827db96d56Sopenharmony_ci if curdir == '.': 28837db96d56Sopenharmony_ci msg = '{!r} file is extracted.'.format(src) 28847db96d56Sopenharmony_ci else: 28857db96d56Sopenharmony_ci msg = ('{!r} file is extracted ' 28867db96d56Sopenharmony_ci 'into {!r} directory.').format(src, curdir) 28877db96d56Sopenharmony_ci print(msg) 28887db96d56Sopenharmony_ci else: 28897db96d56Sopenharmony_ci parser.exit(1, '{!r} is not a tar archive.\n'.format(src)) 28907db96d56Sopenharmony_ci 28917db96d56Sopenharmony_ci elif args.create is not None: 28927db96d56Sopenharmony_ci tar_name = args.create.pop(0) 28937db96d56Sopenharmony_ci _, ext = os.path.splitext(tar_name) 28947db96d56Sopenharmony_ci compressions = { 28957db96d56Sopenharmony_ci # gz 28967db96d56Sopenharmony_ci '.gz': 'gz', 28977db96d56Sopenharmony_ci '.tgz': 'gz', 28987db96d56Sopenharmony_ci # xz 28997db96d56Sopenharmony_ci '.xz': 'xz', 29007db96d56Sopenharmony_ci '.txz': 'xz', 29017db96d56Sopenharmony_ci # bz2 29027db96d56Sopenharmony_ci '.bz2': 'bz2', 29037db96d56Sopenharmony_ci '.tbz': 'bz2', 29047db96d56Sopenharmony_ci '.tbz2': 'bz2', 29057db96d56Sopenharmony_ci '.tb2': 'bz2', 29067db96d56Sopenharmony_ci } 29077db96d56Sopenharmony_ci tar_mode = 'w:' + compressions[ext] if ext in compressions else 'w' 29087db96d56Sopenharmony_ci tar_files = args.create 29097db96d56Sopenharmony_ci 29107db96d56Sopenharmony_ci with TarFile.open(tar_name, tar_mode) as tf: 29117db96d56Sopenharmony_ci for file_name in tar_files: 29127db96d56Sopenharmony_ci tf.add(file_name) 29137db96d56Sopenharmony_ci 29147db96d56Sopenharmony_ci if args.verbose: 29157db96d56Sopenharmony_ci print('{!r} file created.'.format(tar_name)) 29167db96d56Sopenharmony_ci 29177db96d56Sopenharmony_ciif __name__ == '__main__': 29187db96d56Sopenharmony_ci main() 2919