15f9996aaSopenharmony_ci#!/usr/bin/env python 25f9996aaSopenharmony_ci# -*- coding: utf-8 -*- 35f9996aaSopenharmony_ci# Copyright 2013 The Chromium Authors. All rights reserved. 45f9996aaSopenharmony_ci# Use of this source code is governed by a BSD-style license that can be 55f9996aaSopenharmony_ci# found in the LICENSE file. 65f9996aaSopenharmony_ci 75f9996aaSopenharmony_ciimport difflib 85f9996aaSopenharmony_ciimport hashlib 95f9996aaSopenharmony_ciimport itertools 105f9996aaSopenharmony_ciimport json 115f9996aaSopenharmony_ciimport os 125f9996aaSopenharmony_ciimport zipfile 135f9996aaSopenharmony_cifrom .pycache import pycache_enabled 145f9996aaSopenharmony_cifrom .pycache import pycache 155f9996aaSopenharmony_ci 165f9996aaSopenharmony_ci# When set and a difference is detected, a diff of what changed is printed. 175f9996aaSopenharmony_ciPRINT_EXPLANATIONS = int(os.environ.get('PRINT_BUILD_EXPLANATIONS', 0)) 185f9996aaSopenharmony_ci 195f9996aaSopenharmony_ci# An escape hatch that causes all targets to be rebuilt. 205f9996aaSopenharmony_ci_FORCE_REBUILD = int(os.environ.get('FORCE_REBUILD', 0)) 215f9996aaSopenharmony_ci 225f9996aaSopenharmony_ci 235f9996aaSopenharmony_cidef get_new_metadata(input_strings, input_paths): 245f9996aaSopenharmony_ci new_metadata = _Metadata() 255f9996aaSopenharmony_ci new_metadata.add_strings(input_strings) 265f9996aaSopenharmony_ci 275f9996aaSopenharmony_ci for path in input_paths: 285f9996aaSopenharmony_ci if _is_zip_file(path): 295f9996aaSopenharmony_ci entries = _extract_zip_entries(path) 305f9996aaSopenharmony_ci new_metadata.add_zip_file(path, entries) 315f9996aaSopenharmony_ci else: 325f9996aaSopenharmony_ci new_metadata.add_file(path, _md5_for_path(path)) 335f9996aaSopenharmony_ci return new_metadata 345f9996aaSopenharmony_ci 355f9996aaSopenharmony_ci 365f9996aaSopenharmony_cidef get_old_metadata(record_path): 375f9996aaSopenharmony_ci old_metadata = None 385f9996aaSopenharmony_ci if os.path.exists(record_path): 395f9996aaSopenharmony_ci with open(record_path, 'r') as jsonfile: 405f9996aaSopenharmony_ci try: 415f9996aaSopenharmony_ci old_metadata = _Metadata.from_file(jsonfile) 425f9996aaSopenharmony_ci except: # noqa: E722 pylint: disable=bare-except 435f9996aaSopenharmony_ci pass 445f9996aaSopenharmony_ci return old_metadata 455f9996aaSopenharmony_ci 465f9996aaSopenharmony_ci 475f9996aaSopenharmony_cidef print_explanations(record_path, changes): 485f9996aaSopenharmony_ci if PRINT_EXPLANATIONS: 495f9996aaSopenharmony_ci print('=' * 80) 505f9996aaSopenharmony_ci print('Target is stale: %s' % record_path) 515f9996aaSopenharmony_ci print(changes.describe_difference()) 525f9996aaSopenharmony_ci print('=' * 80) 535f9996aaSopenharmony_ci 545f9996aaSopenharmony_ci 555f9996aaSopenharmony_cidef call_and_record_if_stale( 565f9996aaSopenharmony_ci function, # pylint: disable=invalid-name 575f9996aaSopenharmony_ci record_path=None, 585f9996aaSopenharmony_ci input_paths=None, 595f9996aaSopenharmony_ci input_strings=None, 605f9996aaSopenharmony_ci output_paths=None, 615f9996aaSopenharmony_ci force=False, 625f9996aaSopenharmony_ci pass_changes=False): 635f9996aaSopenharmony_ci """Calls function if outputs are stale. 645f9996aaSopenharmony_ci 655f9996aaSopenharmony_ci Outputs are considered stale if: 665f9996aaSopenharmony_ci - any output_paths are missing, or 675f9996aaSopenharmony_ci - the contents of any file within input_paths has changed, or 685f9996aaSopenharmony_ci - the contents of input_strings has changed. 695f9996aaSopenharmony_ci 705f9996aaSopenharmony_ci To debug which files are out-of-date, set the environment variable: 715f9996aaSopenharmony_ci PRINT_MD5_DIFFS=1 725f9996aaSopenharmony_ci 735f9996aaSopenharmony_ci Args: 745f9996aaSopenharmony_ci function: The function to call. 755f9996aaSopenharmony_ci record_path: Path to record metadata. 765f9996aaSopenharmony_ci Defaults to output_paths[0] + '.md5.stamp' 775f9996aaSopenharmony_ci input_paths: List of paths to calculate a md5 sum on. 785f9996aaSopenharmony_ci input_strings: List of strings to record verbatim. 795f9996aaSopenharmony_ci output_paths: List of output paths. 805f9996aaSopenharmony_ci force: Whether to treat outputs as missing regardless of whether they 815f9996aaSopenharmony_ci actually are. 825f9996aaSopenharmony_ci pass_changes: Whether to pass a Changes instance to |function|. 835f9996aaSopenharmony_ci """ 845f9996aaSopenharmony_ci assert record_path or output_paths 855f9996aaSopenharmony_ci input_paths = input_paths or [] 865f9996aaSopenharmony_ci input_strings = input_strings or [] 875f9996aaSopenharmony_ci output_paths = output_paths or [] 885f9996aaSopenharmony_ci 895f9996aaSopenharmony_ci new_metadata = get_new_metadata(input_strings, input_paths) 905f9996aaSopenharmony_ci force = force or _FORCE_REBUILD 915f9996aaSopenharmony_ci missing_outputs = [ 925f9996aaSopenharmony_ci x for x in output_paths if force or not os.path.exists(x) 935f9996aaSopenharmony_ci ] 945f9996aaSopenharmony_ci 955f9996aaSopenharmony_ci if pycache_enabled: 965f9996aaSopenharmony_ci # Input strings, input files and outputs names together compose 975f9996aaSopenharmony_ci # cache manifest, which is the only identifier of a python action. 985f9996aaSopenharmony_ci manifest = '-'.join( 995f9996aaSopenharmony_ci [new_metadata.strings_md5(), 1005f9996aaSopenharmony_ci new_metadata.files_md5()] + sorted(output_paths)) 1015f9996aaSopenharmony_ci record_path = pycache.get_manifest_path('{}.manifest'.format(manifest)) 1025f9996aaSopenharmony_ci old_metadata = get_old_metadata(record_path) 1035f9996aaSopenharmony_ci else: 1045f9996aaSopenharmony_ci record_path = record_path or output_paths[0] + '.md5.stamp' 1055f9996aaSopenharmony_ci # When outputs are missing, don't bother gathering change information. 1065f9996aaSopenharmony_ci if not missing_outputs: 1075f9996aaSopenharmony_ci old_metadata = get_old_metadata(record_path) 1085f9996aaSopenharmony_ci else: 1095f9996aaSopenharmony_ci old_metadata = None 1105f9996aaSopenharmony_ci 1115f9996aaSopenharmony_ci changes = Changes(old_metadata, new_metadata, force, missing_outputs) 1125f9996aaSopenharmony_ci if not changes.has_changes(): 1135f9996aaSopenharmony_ci if not pycache_enabled: 1145f9996aaSopenharmony_ci return 1155f9996aaSopenharmony_ci if pycache_enabled and pycache.retrieve(output_paths, prefix=manifest): 1165f9996aaSopenharmony_ci return 1175f9996aaSopenharmony_ci 1185f9996aaSopenharmony_ci print_explanations(record_path, changes) 1195f9996aaSopenharmony_ci 1205f9996aaSopenharmony_ci args = (changes, ) if pass_changes else () 1215f9996aaSopenharmony_ci function(*args) 1225f9996aaSopenharmony_ci if pycache_enabled: 1235f9996aaSopenharmony_ci try: 1245f9996aaSopenharmony_ci pycache.report_cache_stat('cache_miss') 1255f9996aaSopenharmony_ci except: # noqa: E722 pylint: disable=bare-except 1265f9996aaSopenharmony_ci pass 1275f9996aaSopenharmony_ci pycache.save(output_paths, prefix=manifest) 1285f9996aaSopenharmony_ci 1295f9996aaSopenharmony_ci with open(record_path, 'w') as record: 1305f9996aaSopenharmony_ci new_metadata.to_file(record) 1315f9996aaSopenharmony_ci 1325f9996aaSopenharmony_ci 1335f9996aaSopenharmony_ciclass Changes(object): 1345f9996aaSopenharmony_ci """Provides and API for querying what changed between runs.""" 1355f9996aaSopenharmony_ci def __init__(self, old_metadata, new_metadata, force, missing_outputs): 1365f9996aaSopenharmony_ci self.old_metadata = old_metadata 1375f9996aaSopenharmony_ci self.new_metadata = new_metadata 1385f9996aaSopenharmony_ci self.force = force 1395f9996aaSopenharmony_ci self.missing_outputs = missing_outputs 1405f9996aaSopenharmony_ci 1415f9996aaSopenharmony_ci def has_changes(self): 1425f9996aaSopenharmony_ci """Returns whether any changes exist.""" 1435f9996aaSopenharmony_ci return ( 1445f9996aaSopenharmony_ci self.force or not self.old_metadata or 1455f9996aaSopenharmony_ci self.old_metadata.strings_md5() != self.new_metadata.strings_md5() 1465f9996aaSopenharmony_ci or self.old_metadata.files_md5() != self.new_metadata.files_md5()) 1475f9996aaSopenharmony_ci 1485f9996aaSopenharmony_ci def added_or_modified_only(self): 1495f9996aaSopenharmony_ci """Returns whether the only changes were from added or modified (sub)files. 1505f9996aaSopenharmony_ci 1515f9996aaSopenharmony_ci No missing outputs, no removed paths/subpaths. 1525f9996aaSopenharmony_ci """ 1535f9996aaSopenharmony_ci if (self.force or not self.old_metadata 1545f9996aaSopenharmony_ci or self.old_metadata.strings_md5() != 1555f9996aaSopenharmony_ci self.new_metadata.strings_md5()): 1565f9996aaSopenharmony_ci return False 1575f9996aaSopenharmony_ci if any(self.iter_removed_paths()): 1585f9996aaSopenharmony_ci return False 1595f9996aaSopenharmony_ci for path in self.iter_modified_paths(): 1605f9996aaSopenharmony_ci if any(self.iter_removed_subpaths(path)): 1615f9996aaSopenharmony_ci return False 1625f9996aaSopenharmony_ci return True 1635f9996aaSopenharmony_ci 1645f9996aaSopenharmony_ci def iter_all_paths(self): 1655f9996aaSopenharmony_ci """Generator for paths.""" 1665f9996aaSopenharmony_ci return self.new_metadata.iter_paths() 1675f9996aaSopenharmony_ci 1685f9996aaSopenharmony_ci def iter_all_subpaths(self, path): 1695f9996aaSopenharmony_ci """Generator for subpaths.""" 1705f9996aaSopenharmony_ci return self.new_metadata.iter_subpaths(path) 1715f9996aaSopenharmony_ci 1725f9996aaSopenharmony_ci def iter_added_paths(self): 1735f9996aaSopenharmony_ci """Generator for paths that were added.""" 1745f9996aaSopenharmony_ci for path in self.new_metadata.iter_paths(): 1755f9996aaSopenharmony_ci if self._get_old_tag(path) is None: 1765f9996aaSopenharmony_ci yield path 1775f9996aaSopenharmony_ci 1785f9996aaSopenharmony_ci def iter_added_subpaths(self, path): 1795f9996aaSopenharmony_ci """Generator for paths that were added within the given zip file.""" 1805f9996aaSopenharmony_ci for subpath in self.new_metadata.iter_subpaths(path): 1815f9996aaSopenharmony_ci if self._get_old_tag(path, subpath) is None: 1825f9996aaSopenharmony_ci yield subpath 1835f9996aaSopenharmony_ci 1845f9996aaSopenharmony_ci def iter_removed_paths(self): 1855f9996aaSopenharmony_ci """Generator for paths that were removed.""" 1865f9996aaSopenharmony_ci if self.old_metadata: 1875f9996aaSopenharmony_ci for path in self.old_metadata.iter_paths(): 1885f9996aaSopenharmony_ci if self.new_metadata.get_tag(path) is None: 1895f9996aaSopenharmony_ci yield path 1905f9996aaSopenharmony_ci 1915f9996aaSopenharmony_ci def iter_removed_subpaths(self, path): 1925f9996aaSopenharmony_ci """Generator for paths that were removed within the given zip file.""" 1935f9996aaSopenharmony_ci if self.old_metadata: 1945f9996aaSopenharmony_ci for subpath in self.old_metadata.iter_subpaths(path): 1955f9996aaSopenharmony_ci if self.new_metadata.get_tag(path, subpath) is None: 1965f9996aaSopenharmony_ci yield subpath 1975f9996aaSopenharmony_ci 1985f9996aaSopenharmony_ci def iter_modified_paths(self): 1995f9996aaSopenharmony_ci """Generator for paths whose contents have changed.""" 2005f9996aaSopenharmony_ci for path in self.new_metadata.iter_paths(): 2015f9996aaSopenharmony_ci old_tag = self._get_old_tag(path) 2025f9996aaSopenharmony_ci new_tag = self.new_metadata.get_tag(path) 2035f9996aaSopenharmony_ci if old_tag is not None and old_tag != new_tag: 2045f9996aaSopenharmony_ci yield path 2055f9996aaSopenharmony_ci 2065f9996aaSopenharmony_ci def iter_modified_subpaths(self, path): 2075f9996aaSopenharmony_ci """Generator for paths within a zip file whose contents have changed.""" 2085f9996aaSopenharmony_ci for subpath in self.new_metadata.iter_subpaths(path): 2095f9996aaSopenharmony_ci old_tag = self._get_old_tag(path, subpath) 2105f9996aaSopenharmony_ci new_tag = self.new_metadata.get_tag(path, subpath) 2115f9996aaSopenharmony_ci if old_tag is not None and old_tag != new_tag: 2125f9996aaSopenharmony_ci yield subpath 2135f9996aaSopenharmony_ci 2145f9996aaSopenharmony_ci def iter_changed_paths(self): 2155f9996aaSopenharmony_ci """Generator for all changed paths (added/removed/modified).""" 2165f9996aaSopenharmony_ci return itertools.chain(self.iter_removed_paths(), 2175f9996aaSopenharmony_ci self.iter_modified_paths(), 2185f9996aaSopenharmony_ci self.iter_added_paths()) 2195f9996aaSopenharmony_ci 2205f9996aaSopenharmony_ci def iter_changed_subpaths(self, path): 2215f9996aaSopenharmony_ci """Generator for paths within a zip that were added/removed/modified.""" 2225f9996aaSopenharmony_ci return itertools.chain(self.iter_removed_subpaths(path), 2235f9996aaSopenharmony_ci self.iter_modified_subpaths(path), 2245f9996aaSopenharmony_ci self.iter_added_subpaths(path)) 2255f9996aaSopenharmony_ci 2265f9996aaSopenharmony_ci def describe_difference(self): 2275f9996aaSopenharmony_ci """Returns a human-readable description of what changed.""" 2285f9996aaSopenharmony_ci if self.force: 2295f9996aaSopenharmony_ci return 'force=True' 2305f9996aaSopenharmony_ci elif self.old_metadata is None: 2315f9996aaSopenharmony_ci return 'Previous stamp file not found.' 2325f9996aaSopenharmony_ci 2335f9996aaSopenharmony_ci if self.old_metadata.strings_md5() != self.new_metadata.strings_md5(): 2345f9996aaSopenharmony_ci ndiff = difflib.ndiff(self.old_metadata.get_strings(), 2355f9996aaSopenharmony_ci self.new_metadata.get_strings()) 2365f9996aaSopenharmony_ci changed = [s for s in ndiff if not s.startswith(' ')] 2375f9996aaSopenharmony_ci return 'Input strings changed:\n ' + '\n '.join(changed) 2385f9996aaSopenharmony_ci 2395f9996aaSopenharmony_ci if self.old_metadata.files_md5() == self.new_metadata.files_md5(): 2405f9996aaSopenharmony_ci return "There's no difference." 2415f9996aaSopenharmony_ci 2425f9996aaSopenharmony_ci lines = [] 2435f9996aaSopenharmony_ci lines.extend('Added: {}'.format(p for p in self.iter_added_paths())) 2445f9996aaSopenharmony_ci lines.extend('Removed: {}'.format(p 2455f9996aaSopenharmony_ci for p in self.iter_removed_paths())) 2465f9996aaSopenharmony_ci for path in self.iter_modified_paths(): 2475f9996aaSopenharmony_ci lines.append('Modified: {}'.format(path)) 2485f9996aaSopenharmony_ci lines.extend(' -> Subpath added: {}'.format( 2495f9996aaSopenharmony_ci p for p in self.iter_added_subpaths(path))) 2505f9996aaSopenharmony_ci lines.extend(' -> Subpath removed: {}'.format( 2515f9996aaSopenharmony_ci p for p in self.iter_removed_subpaths(path))) 2525f9996aaSopenharmony_ci lines.extend(' -> Subpath modified: {}'.format( 2535f9996aaSopenharmony_ci p for p in self.iter_modified_subpaths(path))) 2545f9996aaSopenharmony_ci if lines: 2555f9996aaSopenharmony_ci return 'Input files changed:\n {}'.format('\n '.join(lines)) 2565f9996aaSopenharmony_ci 2575f9996aaSopenharmony_ci if self.missing_outputs: 2585f9996aaSopenharmony_ci return 'Outputs do not exist:\n {}'.format('\n '.join( 2595f9996aaSopenharmony_ci self.missing_outputs)) 2605f9996aaSopenharmony_ci 2615f9996aaSopenharmony_ci return 'I have no idea what changed (there is a bug).' 2625f9996aaSopenharmony_ci 2635f9996aaSopenharmony_ci def _get_old_tag(self, path, subpath=None): 2645f9996aaSopenharmony_ci return self.old_metadata and self.old_metadata.get_tag(path, subpath) 2655f9996aaSopenharmony_ci 2665f9996aaSopenharmony_ci 2675f9996aaSopenharmony_ciclass _Metadata(object): 2685f9996aaSopenharmony_ci """Data model for tracking change metadata.""" 2695f9996aaSopenharmony_ci def __init__(self): 2705f9996aaSopenharmony_ci self._files_md5 = None 2715f9996aaSopenharmony_ci self._strings_md5 = None 2725f9996aaSopenharmony_ci self._files = [] 2735f9996aaSopenharmony_ci self._strings = [] 2745f9996aaSopenharmony_ci # Map of (path, subpath) -> entry. Created upon first call to _get_entry(). 2755f9996aaSopenharmony_ci self._file_map = None 2765f9996aaSopenharmony_ci 2775f9996aaSopenharmony_ci @classmethod 2785f9996aaSopenharmony_ci def from_file(cls, fileobj): 2795f9996aaSopenharmony_ci """Returns a _Metadata initialized from a file object.""" 2805f9996aaSopenharmony_ci ret = cls() 2815f9996aaSopenharmony_ci obj = json.load(fileobj) 2825f9996aaSopenharmony_ci ret._files_md5 = obj['files-md5'] 2835f9996aaSopenharmony_ci ret._strings_md5 = obj['strings-md5'] 2845f9996aaSopenharmony_ci ret._files = obj['input-files'] 2855f9996aaSopenharmony_ci ret._strings = obj['input-strings'] 2865f9996aaSopenharmony_ci return ret 2875f9996aaSopenharmony_ci 2885f9996aaSopenharmony_ci def to_file(self, fileobj): 2895f9996aaSopenharmony_ci """Serializes metadata to the given file object.""" 2905f9996aaSopenharmony_ci obj = { 2915f9996aaSopenharmony_ci "files-md5": self.files_md5(), 2925f9996aaSopenharmony_ci "strings-md5": self.strings_md5(), 2935f9996aaSopenharmony_ci "input-files": self._files, 2945f9996aaSopenharmony_ci "input-strings": self._strings, 2955f9996aaSopenharmony_ci } 2965f9996aaSopenharmony_ci json.dump(obj, fileobj, indent=2, sort_keys=True) 2975f9996aaSopenharmony_ci 2985f9996aaSopenharmony_ci def add_strings(self, values): 2995f9996aaSopenharmony_ci self._assert_not_queried() 3005f9996aaSopenharmony_ci self._strings.extend(str(v) for v in values) 3015f9996aaSopenharmony_ci 3025f9996aaSopenharmony_ci def add_file(self, path, tag): 3035f9996aaSopenharmony_ci """Adds metadata for a non-zip file. 3045f9996aaSopenharmony_ci 3055f9996aaSopenharmony_ci Args: 3065f9996aaSopenharmony_ci path: Path to the file. 3075f9996aaSopenharmony_ci tag: A short string representative of the file contents. 3085f9996aaSopenharmony_ci """ 3095f9996aaSopenharmony_ci self._assert_not_queried() 3105f9996aaSopenharmony_ci self._files.append({ 3115f9996aaSopenharmony_ci 'path': path, 3125f9996aaSopenharmony_ci 'tag': tag, 3135f9996aaSopenharmony_ci }) 3145f9996aaSopenharmony_ci 3155f9996aaSopenharmony_ci def add_zip_file(self, path, entries): 3165f9996aaSopenharmony_ci """Adds metadata for a zip file. 3175f9996aaSopenharmony_ci 3185f9996aaSopenharmony_ci Args: 3195f9996aaSopenharmony_ci path: Path to the file. 3205f9996aaSopenharmony_ci entries: List of (subpath, tag) tuples for entries within the zip. 3215f9996aaSopenharmony_ci """ 3225f9996aaSopenharmony_ci self._assert_not_queried() 3235f9996aaSopenharmony_ci tag = _compute_inline_md5( 3245f9996aaSopenharmony_ci itertools.chain((e[0] for e in entries), (e[1] for e in entries))) 3255f9996aaSopenharmony_ci self._files.append({ 3265f9996aaSopenharmony_ci 'path': 3275f9996aaSopenharmony_ci path, 3285f9996aaSopenharmony_ci 'tag': 3295f9996aaSopenharmony_ci tag, 3305f9996aaSopenharmony_ci 'entries': [{ 3315f9996aaSopenharmony_ci "path": e[0], 3325f9996aaSopenharmony_ci "tag": e[1] 3335f9996aaSopenharmony_ci } for e in entries], 3345f9996aaSopenharmony_ci }) 3355f9996aaSopenharmony_ci 3365f9996aaSopenharmony_ci def get_strings(self): 3375f9996aaSopenharmony_ci """Returns the list of input strings.""" 3385f9996aaSopenharmony_ci return self._strings 3395f9996aaSopenharmony_ci 3405f9996aaSopenharmony_ci def files_md5(self): 3415f9996aaSopenharmony_ci """Lazily computes and returns the aggregate md5 of input files.""" 3425f9996aaSopenharmony_ci if self._files_md5 is None: 3435f9996aaSopenharmony_ci # Omit paths from md5 since temporary files have random names. 3445f9996aaSopenharmony_ci self._files_md5 = _compute_inline_md5( 3455f9996aaSopenharmony_ci self.get_tag(p) for p in sorted(self.iter_paths())) 3465f9996aaSopenharmony_ci return self._files_md5 3475f9996aaSopenharmony_ci 3485f9996aaSopenharmony_ci def strings_md5(self): 3495f9996aaSopenharmony_ci """Lazily computes and returns the aggregate md5 of input strings.""" 3505f9996aaSopenharmony_ci if self._strings_md5 is None: 3515f9996aaSopenharmony_ci self._strings_md5 = _compute_inline_md5(self._strings) 3525f9996aaSopenharmony_ci return self._strings_md5 3535f9996aaSopenharmony_ci 3545f9996aaSopenharmony_ci def get_tag(self, path, subpath=None): 3555f9996aaSopenharmony_ci """Returns the tag for the given path / subpath.""" 3565f9996aaSopenharmony_ci ret = self._get_entry(path, subpath) 3575f9996aaSopenharmony_ci return ret and ret['tag'] 3585f9996aaSopenharmony_ci 3595f9996aaSopenharmony_ci def iter_paths(self): 3605f9996aaSopenharmony_ci """Returns a generator for all top-level paths.""" 3615f9996aaSopenharmony_ci return (e['path'] for e in self._files) 3625f9996aaSopenharmony_ci 3635f9996aaSopenharmony_ci def iter_subpaths(self, path): 3645f9996aaSopenharmony_ci """Returns a generator for all subpaths in the given zip. 3655f9996aaSopenharmony_ci 3665f9996aaSopenharmony_ci If the given path is not a zip file or doesn't exist, returns an empty 3675f9996aaSopenharmony_ci iterable. 3685f9996aaSopenharmony_ci """ 3695f9996aaSopenharmony_ci outer_entry = self._get_entry(path) 3705f9996aaSopenharmony_ci if not outer_entry: 3715f9996aaSopenharmony_ci return () 3725f9996aaSopenharmony_ci subentries = outer_entry.get('entries', []) 3735f9996aaSopenharmony_ci return (entry['path'] for entry in subentries) 3745f9996aaSopenharmony_ci 3755f9996aaSopenharmony_ci def _assert_not_queried(self): 3765f9996aaSopenharmony_ci assert self._files_md5 is None 3775f9996aaSopenharmony_ci assert self._strings_md5 is None 3785f9996aaSopenharmony_ci assert self._file_map is None 3795f9996aaSopenharmony_ci 3805f9996aaSopenharmony_ci def _get_entry(self, path, subpath=None): 3815f9996aaSopenharmony_ci """Returns the JSON entry for the given path / subpath.""" 3825f9996aaSopenharmony_ci if self._file_map is None: 3835f9996aaSopenharmony_ci self._file_map = {} 3845f9996aaSopenharmony_ci for entry in self._files: 3855f9996aaSopenharmony_ci self._file_map[(entry['path'], None)] = entry 3865f9996aaSopenharmony_ci for subentry in entry.get('entries', ()): 3875f9996aaSopenharmony_ci self._file_map[(entry['path'], 3885f9996aaSopenharmony_ci subentry['path'])] = subentry 3895f9996aaSopenharmony_ci return self._file_map.get((path, subpath)) 3905f9996aaSopenharmony_ci 3915f9996aaSopenharmony_ci 3925f9996aaSopenharmony_cidef _update_md5_for_file(md5, path, block_size=2**16): 3935f9996aaSopenharmony_ci # record md5 of linkto for dead link. 3945f9996aaSopenharmony_ci if os.path.islink(path): 3955f9996aaSopenharmony_ci linkto = os.readlink(path) 3965f9996aaSopenharmony_ci if not os.path.exists(linkto): 3975f9996aaSopenharmony_ci md5.update(linkto.encode()) 3985f9996aaSopenharmony_ci return 3995f9996aaSopenharmony_ci 4005f9996aaSopenharmony_ci with open(path, 'rb') as infile: 4015f9996aaSopenharmony_ci while True: 4025f9996aaSopenharmony_ci data = infile.read(block_size) 4035f9996aaSopenharmony_ci if not data: 4045f9996aaSopenharmony_ci break 4055f9996aaSopenharmony_ci md5.update(data) 4065f9996aaSopenharmony_ci 4075f9996aaSopenharmony_ci 4085f9996aaSopenharmony_cidef _update_md5_for_directory(md5, dir_path): 4095f9996aaSopenharmony_ci for root, _, files in os.walk(dir_path): 4105f9996aaSopenharmony_ci for f in files: 4115f9996aaSopenharmony_ci _update_md5_for_file(md5, os.path.join(root, f)) 4125f9996aaSopenharmony_ci 4135f9996aaSopenharmony_ci 4145f9996aaSopenharmony_cidef _md5_for_path(path): 4155f9996aaSopenharmony_ci md5 = hashlib.md5() 4165f9996aaSopenharmony_ci if os.path.isdir(path): 4175f9996aaSopenharmony_ci _update_md5_for_directory(md5, path) 4185f9996aaSopenharmony_ci else: 4195f9996aaSopenharmony_ci _update_md5_for_file(md5, path) 4205f9996aaSopenharmony_ci return md5.hexdigest() 4215f9996aaSopenharmony_ci 4225f9996aaSopenharmony_ci 4235f9996aaSopenharmony_cidef _compute_inline_md5(iterable): 4245f9996aaSopenharmony_ci """Computes the md5 of the concatenated parameters.""" 4255f9996aaSopenharmony_ci md5 = hashlib.md5() 4265f9996aaSopenharmony_ci for item in iterable: 4275f9996aaSopenharmony_ci md5.update(str(item).encode()) 4285f9996aaSopenharmony_ci return md5.hexdigest() 4295f9996aaSopenharmony_ci 4305f9996aaSopenharmony_ci 4315f9996aaSopenharmony_cidef _is_zip_file(path): 4325f9996aaSopenharmony_ci """Returns whether to treat the given file as a zip file.""" 4335f9996aaSopenharmony_ci return path[-4:] in ('.zip') 4345f9996aaSopenharmony_ci 4355f9996aaSopenharmony_ci 4365f9996aaSopenharmony_cidef _extract_zip_entries(path): 4375f9996aaSopenharmony_ci """Returns a list of (path, CRC32) of all files within |path|.""" 4385f9996aaSopenharmony_ci entries = [] 4395f9996aaSopenharmony_ci with zipfile.ZipFile(path) as zip_file: 4405f9996aaSopenharmony_ci for zip_info in zip_file.infolist(): 4415f9996aaSopenharmony_ci # Skip directories and empty files. 4425f9996aaSopenharmony_ci if zip_info.CRC: 4435f9996aaSopenharmony_ci entries.append( 4445f9996aaSopenharmony_ci (zip_info.filename, zip_info.CRC + zip_info.compress_type)) 4455f9996aaSopenharmony_ci return entries 446