1cb93a386Sopenharmony_ci# Copyright (c) 2012 The Chromium Authors. All rights reserved.
2cb93a386Sopenharmony_ci# Use of this source code is governed by a BSD-style license that can be
3cb93a386Sopenharmony_ci# found in the LICENSE file.
4cb93a386Sopenharmony_ci
5cb93a386Sopenharmony_ci"""Checks Java files for illegal imports."""
6cb93a386Sopenharmony_ci
7cb93a386Sopenharmony_ci
8cb93a386Sopenharmony_ci
9cb93a386Sopenharmony_ciimport codecs
10cb93a386Sopenharmony_ciimport os
11cb93a386Sopenharmony_ciimport re
12cb93a386Sopenharmony_ci
13cb93a386Sopenharmony_ciimport results
14cb93a386Sopenharmony_cifrom rules import Rule
15cb93a386Sopenharmony_ci
16cb93a386Sopenharmony_ci
17cb93a386Sopenharmony_ciclass JavaChecker(object):
18cb93a386Sopenharmony_ci  """Import checker for Java files.
19cb93a386Sopenharmony_ci
20cb93a386Sopenharmony_ci  The CheckFile method uses real filesystem paths, but Java imports work in
21cb93a386Sopenharmony_ci  terms of package names. To deal with this, we have an extra "prescan" pass
22cb93a386Sopenharmony_ci  that reads all the .java files and builds a mapping of class name -> filepath.
23cb93a386Sopenharmony_ci  In CheckFile, we convert each import statement into a real filepath, and check
24cb93a386Sopenharmony_ci  that against the rules in the DEPS files.
25cb93a386Sopenharmony_ci
26cb93a386Sopenharmony_ci  Note that in Java you can always use classes in the same directory without an
27cb93a386Sopenharmony_ci  explicit import statement, so these imports can't be blocked with DEPS files.
28cb93a386Sopenharmony_ci  But that shouldn't be a problem, because same-package imports are pretty much
29cb93a386Sopenharmony_ci  always correct by definition. (If we find a case where this is *not* correct,
30cb93a386Sopenharmony_ci  it probably means the package is too big and needs to be split up.)
31cb93a386Sopenharmony_ci
32cb93a386Sopenharmony_ci  Properties:
33cb93a386Sopenharmony_ci    _classmap: dict of fully-qualified Java class name -> filepath
34cb93a386Sopenharmony_ci  """
35cb93a386Sopenharmony_ci
36cb93a386Sopenharmony_ci  EXTENSIONS = ['.java']
37cb93a386Sopenharmony_ci
38cb93a386Sopenharmony_ci  # This regular expression will be used to extract filenames from import
39cb93a386Sopenharmony_ci  # statements.
40cb93a386Sopenharmony_ci  _EXTRACT_IMPORT_PATH = re.compile(r'^import\s+(?:static\s+)?([\w\.]+)\s*;')
41cb93a386Sopenharmony_ci
42cb93a386Sopenharmony_ci  def __init__(self, base_directory, verbose, added_imports=None,
43cb93a386Sopenharmony_ci               allow_multiple_definitions=None):
44cb93a386Sopenharmony_ci    self._base_directory = base_directory
45cb93a386Sopenharmony_ci    self._verbose = verbose
46cb93a386Sopenharmony_ci    self._classmap = {}
47cb93a386Sopenharmony_ci    self._allow_multiple_definitions = allow_multiple_definitions or []
48cb93a386Sopenharmony_ci    if added_imports:
49cb93a386Sopenharmony_ci      added_classset = self._PrescanImportFiles(added_imports)
50cb93a386Sopenharmony_ci      self._PrescanFiles(added_classset)
51cb93a386Sopenharmony_ci
52cb93a386Sopenharmony_ci  def _GetClassFullName(self, filepath):
53cb93a386Sopenharmony_ci    """Get the full class name of a file with package name."""
54cb93a386Sopenharmony_ci    if not os.path.isfile(filepath):
55cb93a386Sopenharmony_ci      return None
56cb93a386Sopenharmony_ci    with codecs.open(filepath, encoding='utf-8') as f:
57cb93a386Sopenharmony_ci      short_class_name, _ = os.path.splitext(os.path.basename(filepath))
58cb93a386Sopenharmony_ci      for line in f:
59cb93a386Sopenharmony_ci        for package in re.findall(r'^package\s+([\w\.]+);', line):
60cb93a386Sopenharmony_ci          return package + '.' + short_class_name
61cb93a386Sopenharmony_ci
62cb93a386Sopenharmony_ci  def _IgnoreDir(self, d):
63cb93a386Sopenharmony_ci    # Skip hidden directories.
64cb93a386Sopenharmony_ci    if d.startswith('.'):
65cb93a386Sopenharmony_ci      return True
66cb93a386Sopenharmony_ci    # Skip the "out" directory, as dealing with generated files is awkward.
67cb93a386Sopenharmony_ci    # We don't want paths like "out/Release/lib.java" in our DEPS files.
68cb93a386Sopenharmony_ci    # TODO(husky): We need some way of determining the "real" path to
69cb93a386Sopenharmony_ci    # a generated file -- i.e., where it would be in source control if
70cb93a386Sopenharmony_ci    # it weren't generated.
71cb93a386Sopenharmony_ci    if d.startswith('out') or d in ('xcodebuild', 'AndroidStudioDefault',
72cb93a386Sopenharmony_ci                                    'libassistant',):
73cb93a386Sopenharmony_ci      return True
74cb93a386Sopenharmony_ci    # Skip third-party directories.
75cb93a386Sopenharmony_ci    if d in ('third_party', 'ThirdParty'):
76cb93a386Sopenharmony_ci      return True
77cb93a386Sopenharmony_ci    return False
78cb93a386Sopenharmony_ci
79cb93a386Sopenharmony_ci  def _PrescanFiles(self, added_classset):
80cb93a386Sopenharmony_ci    for root, dirs, files in os.walk(self._base_directory):
81cb93a386Sopenharmony_ci      # Skip unwanted subdirectories. TODO(husky): it would be better to do
82cb93a386Sopenharmony_ci      # this via the skip_child_includes flag in DEPS files. Maybe hoist this
83cb93a386Sopenharmony_ci      # prescan logic into checkdeps.py itself?
84cb93a386Sopenharmony_ci      # Modify dirs in-place with slice assignment to avoid recursing into them.
85cb93a386Sopenharmony_ci      dirs[:] = [d for d in dirs if not self._IgnoreDir(d)]
86cb93a386Sopenharmony_ci      for f in files:
87cb93a386Sopenharmony_ci        if f.endswith('.java'):
88cb93a386Sopenharmony_ci          self._PrescanFile(os.path.join(root, f), added_classset)
89cb93a386Sopenharmony_ci
90cb93a386Sopenharmony_ci  def _PrescanImportFiles(self, added_imports):
91cb93a386Sopenharmony_ci    """Build a set of fully-qualified class affected by this patch.
92cb93a386Sopenharmony_ci
93cb93a386Sopenharmony_ci    Prescan imported files and build classset to collect full class names
94cb93a386Sopenharmony_ci    with package name. This includes both changed files as well as changed
95cb93a386Sopenharmony_ci    imports.
96cb93a386Sopenharmony_ci
97cb93a386Sopenharmony_ci    Args:
98cb93a386Sopenharmony_ci      added_imports : ((file_path, (import_line, import_line, ...), ...)
99cb93a386Sopenharmony_ci
100cb93a386Sopenharmony_ci    Return:
101cb93a386Sopenharmony_ci      A set of full class names with package name of imported files.
102cb93a386Sopenharmony_ci    """
103cb93a386Sopenharmony_ci    classset = set()
104cb93a386Sopenharmony_ci    for filepath, changed_lines in (added_imports or []):
105cb93a386Sopenharmony_ci      if not self.ShouldCheck(filepath):
106cb93a386Sopenharmony_ci        continue
107cb93a386Sopenharmony_ci      full_class_name = self._GetClassFullName(filepath)
108cb93a386Sopenharmony_ci      if full_class_name:
109cb93a386Sopenharmony_ci        classset.add(full_class_name)
110cb93a386Sopenharmony_ci      for line in changed_lines:
111cb93a386Sopenharmony_ci        found_item = self._EXTRACT_IMPORT_PATH.match(line)
112cb93a386Sopenharmony_ci        if found_item:
113cb93a386Sopenharmony_ci          classset.add(found_item.group(1))
114cb93a386Sopenharmony_ci    return classset
115cb93a386Sopenharmony_ci
116cb93a386Sopenharmony_ci  def _PrescanFile(self, filepath, added_classset):
117cb93a386Sopenharmony_ci    if self._verbose:
118cb93a386Sopenharmony_ci      print('Prescanning: ' + filepath)
119cb93a386Sopenharmony_ci    full_class_name = self._GetClassFullName(filepath)
120cb93a386Sopenharmony_ci    if full_class_name:
121cb93a386Sopenharmony_ci      if full_class_name in self._classmap:
122cb93a386Sopenharmony_ci        if self._verbose or full_class_name in added_classset:
123cb93a386Sopenharmony_ci          if not any(re.match(i, filepath) for i in
124cb93a386Sopenharmony_ci                     self._allow_multiple_definitions):
125cb93a386Sopenharmony_ci            print('WARNING: multiple definitions of %s:' % full_class_name)
126cb93a386Sopenharmony_ci            print('    ' + filepath)
127cb93a386Sopenharmony_ci            print('    ' + self._classmap[full_class_name])
128cb93a386Sopenharmony_ci            print()
129cb93a386Sopenharmony_ci        # Prefer the public repo when multiple matches are found.
130cb93a386Sopenharmony_ci        if self._classmap[full_class_name].startswith(
131cb93a386Sopenharmony_ci            os.path.join(self._base_directory, 'clank')):
132cb93a386Sopenharmony_ci          self._classmap[full_class_name] = filepath
133cb93a386Sopenharmony_ci      else:
134cb93a386Sopenharmony_ci        self._classmap[full_class_name] = filepath
135cb93a386Sopenharmony_ci    elif self._verbose:
136cb93a386Sopenharmony_ci      print('WARNING: no package definition found in %s' % filepath)
137cb93a386Sopenharmony_ci
138cb93a386Sopenharmony_ci  def CheckLine(self, rules, line, filepath, fail_on_temp_allow=False):
139cb93a386Sopenharmony_ci    """Checks the given line with the given rule set.
140cb93a386Sopenharmony_ci
141cb93a386Sopenharmony_ci    Returns a tuple (is_import, dependency_violation) where
142cb93a386Sopenharmony_ci    is_import is True only if the line is an import
143cb93a386Sopenharmony_ci    statement, and dependency_violation is an instance of
144cb93a386Sopenharmony_ci    results.DependencyViolation if the line violates a rule, or None
145cb93a386Sopenharmony_ci    if it does not.
146cb93a386Sopenharmony_ci    """
147cb93a386Sopenharmony_ci    found_item = self._EXTRACT_IMPORT_PATH.match(line)
148cb93a386Sopenharmony_ci    if not found_item:
149cb93a386Sopenharmony_ci      return False, None  # Not a match
150cb93a386Sopenharmony_ci    clazz = found_item.group(1)
151cb93a386Sopenharmony_ci    if clazz not in self._classmap:
152cb93a386Sopenharmony_ci      # Importing a class from outside the Chromium tree. That's fine --
153cb93a386Sopenharmony_ci      # it's probably a Java or Android system class.
154cb93a386Sopenharmony_ci      return True, None
155cb93a386Sopenharmony_ci    import_path = os.path.relpath(
156cb93a386Sopenharmony_ci        self._classmap[clazz], self._base_directory)
157cb93a386Sopenharmony_ci    # Convert Windows paths to Unix style, as used in DEPS files.
158cb93a386Sopenharmony_ci    import_path = import_path.replace(os.path.sep, '/')
159cb93a386Sopenharmony_ci    rule = rules.RuleApplyingTo(import_path, filepath)
160cb93a386Sopenharmony_ci    if (rule.allow == Rule.DISALLOW or
161cb93a386Sopenharmony_ci        (fail_on_temp_allow and rule.allow == Rule.TEMP_ALLOW)):
162cb93a386Sopenharmony_ci      return True, results.DependencyViolation(import_path, rule, rules)
163cb93a386Sopenharmony_ci    return True, None
164cb93a386Sopenharmony_ci
165cb93a386Sopenharmony_ci  def CheckFile(self, rules, filepath):
166cb93a386Sopenharmony_ci    if self._verbose:
167cb93a386Sopenharmony_ci      print('Checking: ' + filepath)
168cb93a386Sopenharmony_ci
169cb93a386Sopenharmony_ci    dependee_status = results.DependeeStatus(filepath)
170cb93a386Sopenharmony_ci    with codecs.open(filepath, encoding='utf-8') as f:
171cb93a386Sopenharmony_ci      for line in f:
172cb93a386Sopenharmony_ci        is_import, violation = self.CheckLine(rules, line, filepath)
173cb93a386Sopenharmony_ci        if violation:
174cb93a386Sopenharmony_ci          dependee_status.AddViolation(violation)
175cb93a386Sopenharmony_ci        if '{' in line:
176cb93a386Sopenharmony_ci          # This is code, so we're finished reading imports for this file.
177cb93a386Sopenharmony_ci          break
178cb93a386Sopenharmony_ci
179cb93a386Sopenharmony_ci    return dependee_status
180cb93a386Sopenharmony_ci
181cb93a386Sopenharmony_ci  @staticmethod
182cb93a386Sopenharmony_ci  def IsJavaFile(filepath):
183cb93a386Sopenharmony_ci    """Returns True if the given path ends in the extensions
184cb93a386Sopenharmony_ci    handled by this checker.
185cb93a386Sopenharmony_ci    """
186cb93a386Sopenharmony_ci    return os.path.splitext(filepath)[1] in JavaChecker.EXTENSIONS
187cb93a386Sopenharmony_ci
188cb93a386Sopenharmony_ci  def ShouldCheck(self, file_path):
189cb93a386Sopenharmony_ci    """Check if the new import file path should be presubmit checked.
190cb93a386Sopenharmony_ci
191cb93a386Sopenharmony_ci    Args:
192cb93a386Sopenharmony_ci      file_path: file path to be checked
193cb93a386Sopenharmony_ci
194cb93a386Sopenharmony_ci    Return:
195cb93a386Sopenharmony_ci      bool: True if the file should be checked; False otherwise.
196cb93a386Sopenharmony_ci    """
197cb93a386Sopenharmony_ci    return self.IsJavaFile(file_path)
198