15f9996aaSopenharmony_ci#!/usr/bin/env python
25f9996aaSopenharmony_ci# -*- coding: utf-8 -*-
35f9996aaSopenharmony_ci# Copyright 2013 The Chromium Authors. All rights reserved.
45f9996aaSopenharmony_ci# Use of this source code is governed by a BSD-style license that can be
55f9996aaSopenharmony_ci# found in the LICENSE file.
65f9996aaSopenharmony_ci
75f9996aaSopenharmony_ciimport difflib
85f9996aaSopenharmony_ciimport hashlib
95f9996aaSopenharmony_ciimport itertools
105f9996aaSopenharmony_ciimport json
115f9996aaSopenharmony_ciimport os
125f9996aaSopenharmony_ciimport zipfile
135f9996aaSopenharmony_cifrom .pycache import pycache_enabled
145f9996aaSopenharmony_cifrom .pycache import pycache
155f9996aaSopenharmony_ci
165f9996aaSopenharmony_ci# When set and a difference is detected, a diff of what changed is printed.
175f9996aaSopenharmony_ciPRINT_EXPLANATIONS = int(os.environ.get('PRINT_BUILD_EXPLANATIONS', 0))
185f9996aaSopenharmony_ci
195f9996aaSopenharmony_ci# An escape hatch that causes all targets to be rebuilt.
205f9996aaSopenharmony_ci_FORCE_REBUILD = int(os.environ.get('FORCE_REBUILD', 0))
215f9996aaSopenharmony_ci
225f9996aaSopenharmony_ci
235f9996aaSopenharmony_cidef get_new_metadata(input_strings, input_paths):
245f9996aaSopenharmony_ci    new_metadata = _Metadata()
255f9996aaSopenharmony_ci    new_metadata.add_strings(input_strings)
265f9996aaSopenharmony_ci
275f9996aaSopenharmony_ci    for path in input_paths:
285f9996aaSopenharmony_ci        if _is_zip_file(path):
295f9996aaSopenharmony_ci            entries = _extract_zip_entries(path)
305f9996aaSopenharmony_ci            new_metadata.add_zip_file(path, entries)
315f9996aaSopenharmony_ci        else:
325f9996aaSopenharmony_ci            new_metadata.add_file(path, _md5_for_path(path))
335f9996aaSopenharmony_ci    return new_metadata
345f9996aaSopenharmony_ci
355f9996aaSopenharmony_ci
365f9996aaSopenharmony_cidef get_old_metadata(record_path):
375f9996aaSopenharmony_ci    old_metadata = None
385f9996aaSopenharmony_ci    if os.path.exists(record_path):
395f9996aaSopenharmony_ci        with open(record_path, 'r') as jsonfile:
405f9996aaSopenharmony_ci            try:
415f9996aaSopenharmony_ci                old_metadata = _Metadata.from_file(jsonfile)
425f9996aaSopenharmony_ci            except:  # noqa: E722 pylint: disable=bare-except
435f9996aaSopenharmony_ci                pass
445f9996aaSopenharmony_ci    return old_metadata
455f9996aaSopenharmony_ci
465f9996aaSopenharmony_ci
475f9996aaSopenharmony_cidef print_explanations(record_path, changes):
485f9996aaSopenharmony_ci    if PRINT_EXPLANATIONS:
495f9996aaSopenharmony_ci        print('=' * 80)
505f9996aaSopenharmony_ci        print('Target is stale: %s' % record_path)
515f9996aaSopenharmony_ci        print(changes.describe_difference())
525f9996aaSopenharmony_ci        print('=' * 80)
535f9996aaSopenharmony_ci
545f9996aaSopenharmony_ci
555f9996aaSopenharmony_cidef call_and_record_if_stale(
565f9996aaSopenharmony_ci        function,  # pylint: disable=invalid-name
575f9996aaSopenharmony_ci        record_path=None,
585f9996aaSopenharmony_ci        input_paths=None,
595f9996aaSopenharmony_ci        input_strings=None,
605f9996aaSopenharmony_ci        output_paths=None,
615f9996aaSopenharmony_ci        force=False,
625f9996aaSopenharmony_ci        pass_changes=False):
635f9996aaSopenharmony_ci    """Calls function if outputs are stale.
645f9996aaSopenharmony_ci
655f9996aaSopenharmony_ci    Outputs are considered stale if:
665f9996aaSopenharmony_ci    - any output_paths are missing, or
675f9996aaSopenharmony_ci    - the contents of any file within input_paths has changed, or
685f9996aaSopenharmony_ci    - the contents of input_strings has changed.
695f9996aaSopenharmony_ci
705f9996aaSopenharmony_ci    To debug which files are out-of-date, set the environment variable:
715f9996aaSopenharmony_ci        PRINT_MD5_DIFFS=1
725f9996aaSopenharmony_ci
735f9996aaSopenharmony_ci    Args:
745f9996aaSopenharmony_ci      function: The function to call.
755f9996aaSopenharmony_ci      record_path: Path to record metadata.
765f9996aaSopenharmony_ci        Defaults to output_paths[0] + '.md5.stamp'
775f9996aaSopenharmony_ci      input_paths: List of paths to calculate a md5 sum on.
785f9996aaSopenharmony_ci      input_strings: List of strings to record verbatim.
795f9996aaSopenharmony_ci      output_paths: List of output paths.
805f9996aaSopenharmony_ci      force: Whether to treat outputs as missing regardless of whether they
815f9996aaSopenharmony_ci        actually are.
825f9996aaSopenharmony_ci      pass_changes: Whether to pass a Changes instance to |function|.
835f9996aaSopenharmony_ci    """
845f9996aaSopenharmony_ci    assert record_path or output_paths
855f9996aaSopenharmony_ci    input_paths = input_paths or []
865f9996aaSopenharmony_ci    input_strings = input_strings or []
875f9996aaSopenharmony_ci    output_paths = output_paths or []
885f9996aaSopenharmony_ci
895f9996aaSopenharmony_ci    new_metadata = get_new_metadata(input_strings, input_paths)
905f9996aaSopenharmony_ci    force = force or _FORCE_REBUILD
915f9996aaSopenharmony_ci    missing_outputs = [
925f9996aaSopenharmony_ci        x for x in output_paths if force or not os.path.exists(x)
935f9996aaSopenharmony_ci    ]
945f9996aaSopenharmony_ci
955f9996aaSopenharmony_ci    if pycache_enabled:
965f9996aaSopenharmony_ci        # Input strings, input files and outputs names together compose
975f9996aaSopenharmony_ci        # cache manifest, which is the only identifier of a python action.
985f9996aaSopenharmony_ci        manifest = '-'.join(
995f9996aaSopenharmony_ci            [new_metadata.strings_md5(),
1005f9996aaSopenharmony_ci             new_metadata.files_md5()] + sorted(output_paths))
1015f9996aaSopenharmony_ci        record_path = pycache.get_manifest_path('{}.manifest'.format(manifest))
1025f9996aaSopenharmony_ci        old_metadata = get_old_metadata(record_path)
1035f9996aaSopenharmony_ci    else:
1045f9996aaSopenharmony_ci        record_path = record_path or output_paths[0] + '.md5.stamp'
1055f9996aaSopenharmony_ci        # When outputs are missing, don't bother gathering change information.
1065f9996aaSopenharmony_ci        if not missing_outputs:
1075f9996aaSopenharmony_ci            old_metadata = get_old_metadata(record_path)
1085f9996aaSopenharmony_ci        else:
1095f9996aaSopenharmony_ci            old_metadata = None
1105f9996aaSopenharmony_ci
1115f9996aaSopenharmony_ci    changes = Changes(old_metadata, new_metadata, force, missing_outputs)
1125f9996aaSopenharmony_ci    if not changes.has_changes():
1135f9996aaSopenharmony_ci        if not pycache_enabled:
1145f9996aaSopenharmony_ci            return
1155f9996aaSopenharmony_ci        if pycache_enabled and pycache.retrieve(output_paths, prefix=manifest):
1165f9996aaSopenharmony_ci            return
1175f9996aaSopenharmony_ci
1185f9996aaSopenharmony_ci    print_explanations(record_path, changes)
1195f9996aaSopenharmony_ci
1205f9996aaSopenharmony_ci    args = (changes, ) if pass_changes else ()
1215f9996aaSopenharmony_ci    function(*args)
1225f9996aaSopenharmony_ci    if pycache_enabled:
1235f9996aaSopenharmony_ci        try:
1245f9996aaSopenharmony_ci            pycache.report_cache_stat('cache_miss')
1255f9996aaSopenharmony_ci        except:  # noqa: E722 pylint: disable=bare-except
1265f9996aaSopenharmony_ci            pass
1275f9996aaSopenharmony_ci        pycache.save(output_paths, prefix=manifest)
1285f9996aaSopenharmony_ci
1295f9996aaSopenharmony_ci    with open(record_path, 'w') as record:
1305f9996aaSopenharmony_ci        new_metadata.to_file(record)
1315f9996aaSopenharmony_ci
1325f9996aaSopenharmony_ci
1335f9996aaSopenharmony_ciclass Changes(object):
1345f9996aaSopenharmony_ci    """Provides and API for querying what changed between runs."""
1355f9996aaSopenharmony_ci    def __init__(self, old_metadata, new_metadata, force, missing_outputs):
1365f9996aaSopenharmony_ci        self.old_metadata = old_metadata
1375f9996aaSopenharmony_ci        self.new_metadata = new_metadata
1385f9996aaSopenharmony_ci        self.force = force
1395f9996aaSopenharmony_ci        self.missing_outputs = missing_outputs
1405f9996aaSopenharmony_ci
1415f9996aaSopenharmony_ci    def has_changes(self):
1425f9996aaSopenharmony_ci        """Returns whether any changes exist."""
1435f9996aaSopenharmony_ci        return (
1445f9996aaSopenharmony_ci            self.force or not self.old_metadata or
1455f9996aaSopenharmony_ci            self.old_metadata.strings_md5() != self.new_metadata.strings_md5()
1465f9996aaSopenharmony_ci            or self.old_metadata.files_md5() != self.new_metadata.files_md5())
1475f9996aaSopenharmony_ci
1485f9996aaSopenharmony_ci    def added_or_modified_only(self):
1495f9996aaSopenharmony_ci        """Returns whether the only changes were from added or modified (sub)files.
1505f9996aaSopenharmony_ci
1515f9996aaSopenharmony_ci        No missing outputs, no removed paths/subpaths.
1525f9996aaSopenharmony_ci        """
1535f9996aaSopenharmony_ci        if (self.force or not self.old_metadata
1545f9996aaSopenharmony_ci                or self.old_metadata.strings_md5() !=
1555f9996aaSopenharmony_ci                self.new_metadata.strings_md5()):
1565f9996aaSopenharmony_ci            return False
1575f9996aaSopenharmony_ci        if any(self.iter_removed_paths()):
1585f9996aaSopenharmony_ci            return False
1595f9996aaSopenharmony_ci        for path in self.iter_modified_paths():
1605f9996aaSopenharmony_ci            if any(self.iter_removed_subpaths(path)):
1615f9996aaSopenharmony_ci                return False
1625f9996aaSopenharmony_ci        return True
1635f9996aaSopenharmony_ci
1645f9996aaSopenharmony_ci    def iter_all_paths(self):
1655f9996aaSopenharmony_ci        """Generator for paths."""
1665f9996aaSopenharmony_ci        return self.new_metadata.iter_paths()
1675f9996aaSopenharmony_ci
1685f9996aaSopenharmony_ci    def iter_all_subpaths(self, path):
1695f9996aaSopenharmony_ci        """Generator for subpaths."""
1705f9996aaSopenharmony_ci        return self.new_metadata.iter_subpaths(path)
1715f9996aaSopenharmony_ci
1725f9996aaSopenharmony_ci    def iter_added_paths(self):
1735f9996aaSopenharmony_ci        """Generator for paths that were added."""
1745f9996aaSopenharmony_ci        for path in self.new_metadata.iter_paths():
1755f9996aaSopenharmony_ci            if self._get_old_tag(path) is None:
1765f9996aaSopenharmony_ci                yield path
1775f9996aaSopenharmony_ci
1785f9996aaSopenharmony_ci    def iter_added_subpaths(self, path):
1795f9996aaSopenharmony_ci        """Generator for paths that were added within the given zip file."""
1805f9996aaSopenharmony_ci        for subpath in self.new_metadata.iter_subpaths(path):
1815f9996aaSopenharmony_ci            if self._get_old_tag(path, subpath) is None:
1825f9996aaSopenharmony_ci                yield subpath
1835f9996aaSopenharmony_ci
1845f9996aaSopenharmony_ci    def iter_removed_paths(self):
1855f9996aaSopenharmony_ci        """Generator for paths that were removed."""
1865f9996aaSopenharmony_ci        if self.old_metadata:
1875f9996aaSopenharmony_ci            for path in self.old_metadata.iter_paths():
1885f9996aaSopenharmony_ci                if self.new_metadata.get_tag(path) is None:
1895f9996aaSopenharmony_ci                    yield path
1905f9996aaSopenharmony_ci
1915f9996aaSopenharmony_ci    def iter_removed_subpaths(self, path):
1925f9996aaSopenharmony_ci        """Generator for paths that were removed within the given zip file."""
1935f9996aaSopenharmony_ci        if self.old_metadata:
1945f9996aaSopenharmony_ci            for subpath in self.old_metadata.iter_subpaths(path):
1955f9996aaSopenharmony_ci                if self.new_metadata.get_tag(path, subpath) is None:
1965f9996aaSopenharmony_ci                    yield subpath
1975f9996aaSopenharmony_ci
1985f9996aaSopenharmony_ci    def iter_modified_paths(self):
1995f9996aaSopenharmony_ci        """Generator for paths whose contents have changed."""
2005f9996aaSopenharmony_ci        for path in self.new_metadata.iter_paths():
2015f9996aaSopenharmony_ci            old_tag = self._get_old_tag(path)
2025f9996aaSopenharmony_ci            new_tag = self.new_metadata.get_tag(path)
2035f9996aaSopenharmony_ci            if old_tag is not None and old_tag != new_tag:
2045f9996aaSopenharmony_ci                yield path
2055f9996aaSopenharmony_ci
2065f9996aaSopenharmony_ci    def iter_modified_subpaths(self, path):
2075f9996aaSopenharmony_ci        """Generator for paths within a zip file whose contents have changed."""
2085f9996aaSopenharmony_ci        for subpath in self.new_metadata.iter_subpaths(path):
2095f9996aaSopenharmony_ci            old_tag = self._get_old_tag(path, subpath)
2105f9996aaSopenharmony_ci            new_tag = self.new_metadata.get_tag(path, subpath)
2115f9996aaSopenharmony_ci            if old_tag is not None and old_tag != new_tag:
2125f9996aaSopenharmony_ci                yield subpath
2135f9996aaSopenharmony_ci
2145f9996aaSopenharmony_ci    def iter_changed_paths(self):
2155f9996aaSopenharmony_ci        """Generator for all changed paths (added/removed/modified)."""
2165f9996aaSopenharmony_ci        return itertools.chain(self.iter_removed_paths(),
2175f9996aaSopenharmony_ci                               self.iter_modified_paths(),
2185f9996aaSopenharmony_ci                               self.iter_added_paths())
2195f9996aaSopenharmony_ci
2205f9996aaSopenharmony_ci    def iter_changed_subpaths(self, path):
2215f9996aaSopenharmony_ci        """Generator for paths within a zip that were added/removed/modified."""
2225f9996aaSopenharmony_ci        return itertools.chain(self.iter_removed_subpaths(path),
2235f9996aaSopenharmony_ci                               self.iter_modified_subpaths(path),
2245f9996aaSopenharmony_ci                               self.iter_added_subpaths(path))
2255f9996aaSopenharmony_ci
2265f9996aaSopenharmony_ci    def describe_difference(self):
2275f9996aaSopenharmony_ci        """Returns a human-readable description of what changed."""
2285f9996aaSopenharmony_ci        if self.force:
2295f9996aaSopenharmony_ci            return 'force=True'
2305f9996aaSopenharmony_ci        elif self.old_metadata is None:
2315f9996aaSopenharmony_ci            return 'Previous stamp file not found.'
2325f9996aaSopenharmony_ci
2335f9996aaSopenharmony_ci        if self.old_metadata.strings_md5() != self.new_metadata.strings_md5():
2345f9996aaSopenharmony_ci            ndiff = difflib.ndiff(self.old_metadata.get_strings(),
2355f9996aaSopenharmony_ci                                  self.new_metadata.get_strings())
2365f9996aaSopenharmony_ci            changed = [s for s in ndiff if not s.startswith(' ')]
2375f9996aaSopenharmony_ci            return 'Input strings changed:\n  ' + '\n  '.join(changed)
2385f9996aaSopenharmony_ci
2395f9996aaSopenharmony_ci        if self.old_metadata.files_md5() == self.new_metadata.files_md5():
2405f9996aaSopenharmony_ci            return "There's no difference."
2415f9996aaSopenharmony_ci
2425f9996aaSopenharmony_ci        lines = []
2435f9996aaSopenharmony_ci        lines.extend('Added: {}'.format(p for p in self.iter_added_paths()))
2445f9996aaSopenharmony_ci        lines.extend('Removed: {}'.format(p
2455f9996aaSopenharmony_ci                                          for p in self.iter_removed_paths()))
2465f9996aaSopenharmony_ci        for path in self.iter_modified_paths():
2475f9996aaSopenharmony_ci            lines.append('Modified: {}'.format(path))
2485f9996aaSopenharmony_ci            lines.extend('  -> Subpath added: {}'.format(
2495f9996aaSopenharmony_ci                p for p in self.iter_added_subpaths(path)))
2505f9996aaSopenharmony_ci            lines.extend('  -> Subpath removed: {}'.format(
2515f9996aaSopenharmony_ci                p for p in self.iter_removed_subpaths(path)))
2525f9996aaSopenharmony_ci            lines.extend('  -> Subpath modified: {}'.format(
2535f9996aaSopenharmony_ci                p for p in self.iter_modified_subpaths(path)))
2545f9996aaSopenharmony_ci        if lines:
2555f9996aaSopenharmony_ci            return 'Input files changed:\n  {}'.format('\n  '.join(lines))
2565f9996aaSopenharmony_ci
2575f9996aaSopenharmony_ci        if self.missing_outputs:
2585f9996aaSopenharmony_ci            return 'Outputs do not exist:\n  {}'.format('\n  '.join(
2595f9996aaSopenharmony_ci                self.missing_outputs))
2605f9996aaSopenharmony_ci
2615f9996aaSopenharmony_ci        return 'I have no idea what changed (there is a bug).'
2625f9996aaSopenharmony_ci
2635f9996aaSopenharmony_ci    def _get_old_tag(self, path, subpath=None):
2645f9996aaSopenharmony_ci        return self.old_metadata and self.old_metadata.get_tag(path, subpath)
2655f9996aaSopenharmony_ci
2665f9996aaSopenharmony_ci
2675f9996aaSopenharmony_ciclass _Metadata(object):
2685f9996aaSopenharmony_ci    """Data model for tracking change metadata."""
2695f9996aaSopenharmony_ci    def __init__(self):
2705f9996aaSopenharmony_ci        self._files_md5 = None
2715f9996aaSopenharmony_ci        self._strings_md5 = None
2725f9996aaSopenharmony_ci        self._files = []
2735f9996aaSopenharmony_ci        self._strings = []
2745f9996aaSopenharmony_ci        # Map of (path, subpath) -> entry. Created upon first call to _get_entry().
2755f9996aaSopenharmony_ci        self._file_map = None
2765f9996aaSopenharmony_ci
2775f9996aaSopenharmony_ci    @classmethod
2785f9996aaSopenharmony_ci    def from_file(cls, fileobj):
2795f9996aaSopenharmony_ci        """Returns a _Metadata initialized from a file object."""
2805f9996aaSopenharmony_ci        ret = cls()
2815f9996aaSopenharmony_ci        obj = json.load(fileobj)
2825f9996aaSopenharmony_ci        ret._files_md5 = obj['files-md5']
2835f9996aaSopenharmony_ci        ret._strings_md5 = obj['strings-md5']
2845f9996aaSopenharmony_ci        ret._files = obj['input-files']
2855f9996aaSopenharmony_ci        ret._strings = obj['input-strings']
2865f9996aaSopenharmony_ci        return ret
2875f9996aaSopenharmony_ci
2885f9996aaSopenharmony_ci    def to_file(self, fileobj):
2895f9996aaSopenharmony_ci        """Serializes metadata to the given file object."""
2905f9996aaSopenharmony_ci        obj = {
2915f9996aaSopenharmony_ci            "files-md5": self.files_md5(),
2925f9996aaSopenharmony_ci            "strings-md5": self.strings_md5(),
2935f9996aaSopenharmony_ci            "input-files": self._files,
2945f9996aaSopenharmony_ci            "input-strings": self._strings,
2955f9996aaSopenharmony_ci        }
2965f9996aaSopenharmony_ci        json.dump(obj, fileobj, indent=2, sort_keys=True)
2975f9996aaSopenharmony_ci
2985f9996aaSopenharmony_ci    def add_strings(self, values):
2995f9996aaSopenharmony_ci        self._assert_not_queried()
3005f9996aaSopenharmony_ci        self._strings.extend(str(v) for v in values)
3015f9996aaSopenharmony_ci
3025f9996aaSopenharmony_ci    def add_file(self, path, tag):
3035f9996aaSopenharmony_ci        """Adds metadata for a non-zip file.
3045f9996aaSopenharmony_ci
3055f9996aaSopenharmony_ci        Args:
3065f9996aaSopenharmony_ci          path: Path to the file.
3075f9996aaSopenharmony_ci          tag: A short string representative of the file contents.
3085f9996aaSopenharmony_ci        """
3095f9996aaSopenharmony_ci        self._assert_not_queried()
3105f9996aaSopenharmony_ci        self._files.append({
3115f9996aaSopenharmony_ci            'path': path,
3125f9996aaSopenharmony_ci            'tag': tag,
3135f9996aaSopenharmony_ci        })
3145f9996aaSopenharmony_ci
3155f9996aaSopenharmony_ci    def add_zip_file(self, path, entries):
3165f9996aaSopenharmony_ci        """Adds metadata for a zip file.
3175f9996aaSopenharmony_ci
3185f9996aaSopenharmony_ci        Args:
3195f9996aaSopenharmony_ci          path: Path to the file.
3205f9996aaSopenharmony_ci          entries: List of (subpath, tag) tuples for entries within the zip.
3215f9996aaSopenharmony_ci        """
3225f9996aaSopenharmony_ci        self._assert_not_queried()
3235f9996aaSopenharmony_ci        tag = _compute_inline_md5(
3245f9996aaSopenharmony_ci            itertools.chain((e[0] for e in entries), (e[1] for e in entries)))
3255f9996aaSopenharmony_ci        self._files.append({
3265f9996aaSopenharmony_ci            'path':
3275f9996aaSopenharmony_ci            path,
3285f9996aaSopenharmony_ci            'tag':
3295f9996aaSopenharmony_ci            tag,
3305f9996aaSopenharmony_ci            'entries': [{
3315f9996aaSopenharmony_ci                "path": e[0],
3325f9996aaSopenharmony_ci                "tag": e[1]
3335f9996aaSopenharmony_ci            } for e in entries],
3345f9996aaSopenharmony_ci        })
3355f9996aaSopenharmony_ci
3365f9996aaSopenharmony_ci    def get_strings(self):
3375f9996aaSopenharmony_ci        """Returns the list of input strings."""
3385f9996aaSopenharmony_ci        return self._strings
3395f9996aaSopenharmony_ci
3405f9996aaSopenharmony_ci    def files_md5(self):
3415f9996aaSopenharmony_ci        """Lazily computes and returns the aggregate md5 of input files."""
3425f9996aaSopenharmony_ci        if self._files_md5 is None:
3435f9996aaSopenharmony_ci            # Omit paths from md5 since temporary files have random names.
3445f9996aaSopenharmony_ci            self._files_md5 = _compute_inline_md5(
3455f9996aaSopenharmony_ci                self.get_tag(p) for p in sorted(self.iter_paths()))
3465f9996aaSopenharmony_ci        return self._files_md5
3475f9996aaSopenharmony_ci
3485f9996aaSopenharmony_ci    def strings_md5(self):
3495f9996aaSopenharmony_ci        """Lazily computes and returns the aggregate md5 of input strings."""
3505f9996aaSopenharmony_ci        if self._strings_md5 is None:
3515f9996aaSopenharmony_ci            self._strings_md5 = _compute_inline_md5(self._strings)
3525f9996aaSopenharmony_ci        return self._strings_md5
3535f9996aaSopenharmony_ci
3545f9996aaSopenharmony_ci    def get_tag(self, path, subpath=None):
3555f9996aaSopenharmony_ci        """Returns the tag for the given path / subpath."""
3565f9996aaSopenharmony_ci        ret = self._get_entry(path, subpath)
3575f9996aaSopenharmony_ci        return ret and ret['tag']
3585f9996aaSopenharmony_ci
3595f9996aaSopenharmony_ci    def iter_paths(self):
3605f9996aaSopenharmony_ci        """Returns a generator for all top-level paths."""
3615f9996aaSopenharmony_ci        return (e['path'] for e in self._files)
3625f9996aaSopenharmony_ci
3635f9996aaSopenharmony_ci    def iter_subpaths(self, path):
3645f9996aaSopenharmony_ci        """Returns a generator for all subpaths in the given zip.
3655f9996aaSopenharmony_ci
3665f9996aaSopenharmony_ci        If the given path is not a zip file or doesn't exist, returns an empty
3675f9996aaSopenharmony_ci        iterable.
3685f9996aaSopenharmony_ci        """
3695f9996aaSopenharmony_ci        outer_entry = self._get_entry(path)
3705f9996aaSopenharmony_ci        if not outer_entry:
3715f9996aaSopenharmony_ci            return ()
3725f9996aaSopenharmony_ci        subentries = outer_entry.get('entries', [])
3735f9996aaSopenharmony_ci        return (entry['path'] for entry in subentries)
3745f9996aaSopenharmony_ci
3755f9996aaSopenharmony_ci    def _assert_not_queried(self):
3765f9996aaSopenharmony_ci        assert self._files_md5 is None
3775f9996aaSopenharmony_ci        assert self._strings_md5 is None
3785f9996aaSopenharmony_ci        assert self._file_map is None
3795f9996aaSopenharmony_ci
3805f9996aaSopenharmony_ci    def _get_entry(self, path, subpath=None):
3815f9996aaSopenharmony_ci        """Returns the JSON entry for the given path / subpath."""
3825f9996aaSopenharmony_ci        if self._file_map is None:
3835f9996aaSopenharmony_ci            self._file_map = {}
3845f9996aaSopenharmony_ci            for entry in self._files:
3855f9996aaSopenharmony_ci                self._file_map[(entry['path'], None)] = entry
3865f9996aaSopenharmony_ci                for subentry in entry.get('entries', ()):
3875f9996aaSopenharmony_ci                    self._file_map[(entry['path'],
3885f9996aaSopenharmony_ci                                    subentry['path'])] = subentry
3895f9996aaSopenharmony_ci        return self._file_map.get((path, subpath))
3905f9996aaSopenharmony_ci
3915f9996aaSopenharmony_ci
3925f9996aaSopenharmony_cidef _update_md5_for_file(md5, path, block_size=2**16):
3935f9996aaSopenharmony_ci    # record md5 of linkto for dead link.
3945f9996aaSopenharmony_ci    if os.path.islink(path):
3955f9996aaSopenharmony_ci        linkto = os.readlink(path)
3965f9996aaSopenharmony_ci        if not os.path.exists(linkto):
3975f9996aaSopenharmony_ci            md5.update(linkto.encode())
3985f9996aaSopenharmony_ci            return
3995f9996aaSopenharmony_ci
4005f9996aaSopenharmony_ci    with open(path, 'rb') as infile:
4015f9996aaSopenharmony_ci        while True:
4025f9996aaSopenharmony_ci            data = infile.read(block_size)
4035f9996aaSopenharmony_ci            if not data:
4045f9996aaSopenharmony_ci                break
4055f9996aaSopenharmony_ci            md5.update(data)
4065f9996aaSopenharmony_ci
4075f9996aaSopenharmony_ci
4085f9996aaSopenharmony_cidef _update_md5_for_directory(md5, dir_path):
4095f9996aaSopenharmony_ci    for root, _, files in os.walk(dir_path):
4105f9996aaSopenharmony_ci        for f in files:
4115f9996aaSopenharmony_ci            _update_md5_for_file(md5, os.path.join(root, f))
4125f9996aaSopenharmony_ci
4135f9996aaSopenharmony_ci
4145f9996aaSopenharmony_cidef _md5_for_path(path):
4155f9996aaSopenharmony_ci    md5 = hashlib.md5()
4165f9996aaSopenharmony_ci    if os.path.isdir(path):
4175f9996aaSopenharmony_ci        _update_md5_for_directory(md5, path)
4185f9996aaSopenharmony_ci    else:
4195f9996aaSopenharmony_ci        _update_md5_for_file(md5, path)
4205f9996aaSopenharmony_ci    return md5.hexdigest()
4215f9996aaSopenharmony_ci
4225f9996aaSopenharmony_ci
4235f9996aaSopenharmony_cidef _compute_inline_md5(iterable):
4245f9996aaSopenharmony_ci    """Computes the md5 of the concatenated parameters."""
4255f9996aaSopenharmony_ci    md5 = hashlib.md5()
4265f9996aaSopenharmony_ci    for item in iterable:
4275f9996aaSopenharmony_ci        md5.update(str(item).encode())
4285f9996aaSopenharmony_ci    return md5.hexdigest()
4295f9996aaSopenharmony_ci
4305f9996aaSopenharmony_ci
4315f9996aaSopenharmony_cidef _is_zip_file(path):
4325f9996aaSopenharmony_ci    """Returns whether to treat the given file as a zip file."""
4335f9996aaSopenharmony_ci    return path[-4:] in ('.zip')
4345f9996aaSopenharmony_ci
4355f9996aaSopenharmony_ci
4365f9996aaSopenharmony_cidef _extract_zip_entries(path):
4375f9996aaSopenharmony_ci    """Returns a list of (path, CRC32) of all files within |path|."""
4385f9996aaSopenharmony_ci    entries = []
4395f9996aaSopenharmony_ci    with zipfile.ZipFile(path) as zip_file:
4405f9996aaSopenharmony_ci        for zip_info in zip_file.infolist():
4415f9996aaSopenharmony_ci            # Skip directories and empty files.
4425f9996aaSopenharmony_ci            if zip_info.CRC:
4435f9996aaSopenharmony_ci                entries.append(
4445f9996aaSopenharmony_ci                    (zip_info.filename, zip_info.CRC + zip_info.compress_type))
4455f9996aaSopenharmony_ci    return entries
446