1import os 2import json 3import argparse 4import logging 5from typing import List, Dict, Optional 6 7REQUIRED_FIELDS = [ 8 "Name", 9 "License", 10 "License File", 11 "Version Number", 12 "Owner", 13 "Upstream URL", 14 "Description" 15] 16 17class OpenSourceValidator: 18 def __init__( 19 self, 20 project_root: str, 21 log_file: Optional[str] = None, 22 reference_data: Optional[List[Dict[str, str]]] = None 23 ): 24 self.project_root = project_root 25 self.reference_data = reference_data or [] 26 self.log_file = log_file 27 28 # 设置日志配置 29 if self.log_file: 30 logging.basicConfig( 31 filename=self.log_file, 32 level=logging.INFO, 33 format="%(asctime)s - %(levelname)s - %(message)s", 34 ) 35 else: 36 logging.basicConfig( 37 level=logging.INFO, 38 format="%(asctime)s - %(levelname)s - %(message)s", 39 ) 40 41 def find_all_readmes(self) -> List[str]: 42 """递归查找所有 README.OpenSource 文件""" 43 readme_paths = [] 44 for dirpath, _, filenames in os.walk(self.project_root): 45 if "README.OpenSource" in filenames: 46 readme_paths.append(os.path.join(dirpath, "README.OpenSource")) 47 return readme_paths 48 49 def validate_format(self, readme_path: str) -> bool: 50 """验证 README.OpenSource 文件的格式和必需字段""" 51 errors = [] 52 try: 53 with open(readme_path, 'r', encoding='utf-8') as f: 54 data = json.load(f) 55 if not isinstance(data, list): 56 errors.append("The file does not contain a JSON array.") 57 return False 58 for idx, component in enumerate(data): 59 for field in REQUIRED_FIELDS: 60 if field not in component: 61 errors.append(f"Component {idx + 1} is missing required field: {field}") 62 except json.JSONDecodeError as e: 63 errors.append(f"JSON decode error: {e}") 64 return False 65 except Exception as e: 66 errors.append(f"Unexpected error: {e}") 67 return False 68 69 if errors: 70 for error in errors: 71 logging.error(f"{readme_path}: {error}") 72 return False 73 else: 74 logging.info(f"{readme_path} format is valid.") 75 return True 76 77 def load_reference_data(self, reference_data_path: str): 78 """从 JSON 配置文件中加载参考数据""" 79 try: 80 with open(reference_data_path, "r", encoding='utf-8') as f: 81 self.reference_data = json.load(f) 82 except Exception as e: 83 raise ValueError( 84 f"Failed to load reference data from {reference_data_path}: {e}" 85 ) 86 87 def find_reference_data(self, name: str) -> Optional[Dict[str, str]]: 88 """在参考数据中根据名称查找对应的开源软件信息""" 89 for reference in self.reference_data: 90 if reference.get("Name") == name: 91 return reference 92 return None 93 94 def validate_content(self, readme_path: str) -> bool: 95 """校验 README.OpenSource 文件的内容,并与参考数据进行比对""" 96 # Step 1: 读取 JSON 文件 97 try: 98 with open(readme_path, "r", encoding='utf-8') as f: 99 readme_data = json.load(f) 100 if not isinstance(readme_data, list): 101 logging.error(f"{readme_path}: JSON data is not an array.") 102 return False 103 except json.JSONDecodeError as e: 104 logging.error(f"{readme_path}: JSON decode error: {e}") 105 return False 106 107 # Step 2: 校验 JSON 数组中的每个开源软件元数据 108 all_valid = True 109 for software_data in readme_data: 110 name = software_data.get("Name") 111 if not name: 112 logging.error(f"{readme_path}: Missing 'Name' field in software data.") 113 all_valid = False 114 continue 115 116 reference_data = self.find_reference_data(name) 117 118 if reference_data is None: 119 logging.error( 120 f"{readme_path}: Software '{name}' not found in reference data." 121 ) 122 all_valid = False 123 continue 124 125 # 比对 "Name", "License", "Version Number", "Upstream URL" 126 for field in ["Name", "License", "Version Number", "Upstream URL"]: 127 expected_value = reference_data.get(field) 128 actual_value = software_data.get(field) 129 if actual_value != expected_value: 130 logging.error( 131 f"{readme_path}: Field '{field}' mismatch for '{name}'. Expected: '{expected_value}', Found: '{actual_value}'" 132 ) 133 all_valid = False 134 135 # 校验 "License File" 路径是否存在 136 if not self.validate_license_file(readme_path, software_data.get("License File")): 137 all_valid = False 138 139 if all_valid: 140 logging.info(f"{readme_path}: Content validation passed.") 141 else: 142 logging.error(f"{readme_path}: Content validation failed.") 143 return all_valid 144 145 def validate_license_file(self, readme_path: str, license_file: str) -> bool: 146 """校验 LICENSE 文件是否存在,路径相对于 README.OpenSource 文件所在目录""" 147 if not license_file: 148 logging.error(f"{readme_path}: 'License File' field is missing.") 149 return False 150 151 readme_dir = os.path.dirname(readme_path) 152 license_file_path = os.path.join(readme_dir, license_file) 153 154 if not os.path.exists(license_file_path): 155 logging.error( 156 f"{readme_path}: License file '{license_file}' not found at: {license_file_path}" 157 ) 158 return False 159 else: 160 logging.info(f"{readme_path}: License file '{license_file}' exists.") 161 return True 162 163 def run_validation(self, validate_format: bool = True, validate_content: bool = False): 164 """运行完整的校验流程,递归处理所有 README.OpenSource 文件""" 165 try: 166 readme_paths = self.find_all_readmes() 167 if not readme_paths: 168 logging.error("No README.OpenSource files found in the project directory.") 169 return 170 171 for readme_path in readme_paths: 172 logging.info(f"Validating: {readme_path}") 173 if validate_format: 174 if not self.validate_format(readme_path): 175 logging.error(f"{readme_path}: Format validation failed.") 176 continue # 如果格式验证失败,跳过内容验证 177 if validate_content: 178 if not self.validate_content(readme_path): 179 logging.error(f"{readme_path}: Content validation failed.") 180 181 logging.info("Validation process completed.") 182 183 except Exception as e: 184 logging.error(f"Validation failed: {e}") 185 186 187def main(): 188 parser = argparse.ArgumentParser( 189 description="Validate README.OpenSource files in a project." 190 ) 191 parser.add_argument("project_root", help="The root directory of the project.") 192 parser.add_argument( 193 "--validate-format", action='store_true', help="Validate the format of README.OpenSource files." 194 ) 195 parser.add_argument( 196 "--validate-content", action='store_true', help="Validate the content of README.OpenSource files against reference data." 197 ) 198 parser.add_argument( 199 "--reference-data", help="Path to the reference data JSON file (required for content validation)." 200 ) 201 parser.add_argument("--log-file", help="Path to the log file for validation results.") 202 203 args = parser.parse_args() 204 205 if args.validate_content and not args.reference_data: 206 parser.error("--reference-data is required for content validation.") 207 208 # 初始化验证器对象 209 validator = OpenSourceValidator( 210 project_root=args.project_root, 211 log_file=args.log_file 212 ) 213 214 if args.validate_content: 215 # 从配置文件中加载参考数据 216 validator.load_reference_data(args.reference_data) 217 218 # 执行校验流程 219 validator.run_validation( 220 validate_format=args.validate_format or not (args.validate_format or args.validate_content), 221 validate_content=args.validate_content 222 ) 223 224 225if __name__ == "__main__": 226 main() 227 228