18c2ecf20Sopenharmony_ci#!/usr/bin/env python 28c2ecf20Sopenharmony_ci# SPDX-License-Identifier: GPL-2.0 38c2ecf20Sopenharmony_ci# Copyright Thomas Gleixner <tglx@linutronix.de> 48c2ecf20Sopenharmony_ci 58c2ecf20Sopenharmony_cifrom argparse import ArgumentParser 68c2ecf20Sopenharmony_cifrom ply import lex, yacc 78c2ecf20Sopenharmony_ciimport locale 88c2ecf20Sopenharmony_ciimport traceback 98c2ecf20Sopenharmony_ciimport sys 108c2ecf20Sopenharmony_ciimport git 118c2ecf20Sopenharmony_ciimport re 128c2ecf20Sopenharmony_ciimport os 138c2ecf20Sopenharmony_ci 148c2ecf20Sopenharmony_ciclass ParserException(Exception): 158c2ecf20Sopenharmony_ci def __init__(self, tok, txt): 168c2ecf20Sopenharmony_ci self.tok = tok 178c2ecf20Sopenharmony_ci self.txt = txt 188c2ecf20Sopenharmony_ci 198c2ecf20Sopenharmony_ciclass SPDXException(Exception): 208c2ecf20Sopenharmony_ci def __init__(self, el, txt): 218c2ecf20Sopenharmony_ci self.el = el 228c2ecf20Sopenharmony_ci self.txt = txt 238c2ecf20Sopenharmony_ci 248c2ecf20Sopenharmony_ciclass SPDXdata(object): 258c2ecf20Sopenharmony_ci def __init__(self): 268c2ecf20Sopenharmony_ci self.license_files = 0 278c2ecf20Sopenharmony_ci self.exception_files = 0 288c2ecf20Sopenharmony_ci self.licenses = [ ] 298c2ecf20Sopenharmony_ci self.exceptions = { } 308c2ecf20Sopenharmony_ci 318c2ecf20Sopenharmony_ci# Read the spdx data from the LICENSES directory 328c2ecf20Sopenharmony_cidef read_spdxdata(repo): 338c2ecf20Sopenharmony_ci 348c2ecf20Sopenharmony_ci # The subdirectories of LICENSES in the kernel source 358c2ecf20Sopenharmony_ci # Note: exceptions needs to be parsed as last directory. 368c2ecf20Sopenharmony_ci license_dirs = [ "preferred", "dual", "deprecated", "exceptions" ] 378c2ecf20Sopenharmony_ci lictree = repo.head.commit.tree['LICENSES'] 388c2ecf20Sopenharmony_ci 398c2ecf20Sopenharmony_ci spdx = SPDXdata() 408c2ecf20Sopenharmony_ci 418c2ecf20Sopenharmony_ci for d in license_dirs: 428c2ecf20Sopenharmony_ci for el in lictree[d].traverse(): 438c2ecf20Sopenharmony_ci if not os.path.isfile(el.path): 448c2ecf20Sopenharmony_ci continue 458c2ecf20Sopenharmony_ci 468c2ecf20Sopenharmony_ci exception = None 478c2ecf20Sopenharmony_ci for l in open(el.path).readlines(): 488c2ecf20Sopenharmony_ci if l.startswith('Valid-License-Identifier:'): 498c2ecf20Sopenharmony_ci lid = l.split(':')[1].strip().upper() 508c2ecf20Sopenharmony_ci if lid in spdx.licenses: 518c2ecf20Sopenharmony_ci raise SPDXException(el, 'Duplicate License Identifier: %s' %lid) 528c2ecf20Sopenharmony_ci else: 538c2ecf20Sopenharmony_ci spdx.licenses.append(lid) 548c2ecf20Sopenharmony_ci 558c2ecf20Sopenharmony_ci elif l.startswith('SPDX-Exception-Identifier:'): 568c2ecf20Sopenharmony_ci exception = l.split(':')[1].strip().upper() 578c2ecf20Sopenharmony_ci spdx.exceptions[exception] = [] 588c2ecf20Sopenharmony_ci 598c2ecf20Sopenharmony_ci elif l.startswith('SPDX-Licenses:'): 608c2ecf20Sopenharmony_ci for lic in l.split(':')[1].upper().strip().replace(' ', '').replace('\t', '').split(','): 618c2ecf20Sopenharmony_ci if not lic in spdx.licenses: 628c2ecf20Sopenharmony_ci raise SPDXException(None, 'Exception %s missing license %s' %(exception, lic)) 638c2ecf20Sopenharmony_ci spdx.exceptions[exception].append(lic) 648c2ecf20Sopenharmony_ci 658c2ecf20Sopenharmony_ci elif l.startswith("License-Text:"): 668c2ecf20Sopenharmony_ci if exception: 678c2ecf20Sopenharmony_ci if not len(spdx.exceptions[exception]): 688c2ecf20Sopenharmony_ci raise SPDXException(el, 'Exception %s is missing SPDX-Licenses' %exception) 698c2ecf20Sopenharmony_ci spdx.exception_files += 1 708c2ecf20Sopenharmony_ci else: 718c2ecf20Sopenharmony_ci spdx.license_files += 1 728c2ecf20Sopenharmony_ci break 738c2ecf20Sopenharmony_ci return spdx 748c2ecf20Sopenharmony_ci 758c2ecf20Sopenharmony_ciclass id_parser(object): 768c2ecf20Sopenharmony_ci 778c2ecf20Sopenharmony_ci reserved = [ 'AND', 'OR', 'WITH' ] 788c2ecf20Sopenharmony_ci tokens = [ 'LPAR', 'RPAR', 'ID', 'EXC' ] + reserved 798c2ecf20Sopenharmony_ci 808c2ecf20Sopenharmony_ci precedence = ( ('nonassoc', 'AND', 'OR'), ) 818c2ecf20Sopenharmony_ci 828c2ecf20Sopenharmony_ci t_ignore = ' \t' 838c2ecf20Sopenharmony_ci 848c2ecf20Sopenharmony_ci def __init__(self, spdx): 858c2ecf20Sopenharmony_ci self.spdx = spdx 868c2ecf20Sopenharmony_ci self.lasttok = None 878c2ecf20Sopenharmony_ci self.lastid = None 888c2ecf20Sopenharmony_ci self.lexer = lex.lex(module = self, reflags = re.UNICODE) 898c2ecf20Sopenharmony_ci # Initialize the parser. No debug file and no parser rules stored on disk 908c2ecf20Sopenharmony_ci # The rules are small enough to be generated on the fly 918c2ecf20Sopenharmony_ci self.parser = yacc.yacc(module = self, write_tables = False, debug = False) 928c2ecf20Sopenharmony_ci self.lines_checked = 0 938c2ecf20Sopenharmony_ci self.checked = 0 948c2ecf20Sopenharmony_ci self.spdx_valid = 0 958c2ecf20Sopenharmony_ci self.spdx_errors = 0 968c2ecf20Sopenharmony_ci self.curline = 0 978c2ecf20Sopenharmony_ci self.deepest = 0 988c2ecf20Sopenharmony_ci 998c2ecf20Sopenharmony_ci # Validate License and Exception IDs 1008c2ecf20Sopenharmony_ci def validate(self, tok): 1018c2ecf20Sopenharmony_ci id = tok.value.upper() 1028c2ecf20Sopenharmony_ci if tok.type == 'ID': 1038c2ecf20Sopenharmony_ci if not id in self.spdx.licenses: 1048c2ecf20Sopenharmony_ci raise ParserException(tok, 'Invalid License ID') 1058c2ecf20Sopenharmony_ci self.lastid = id 1068c2ecf20Sopenharmony_ci elif tok.type == 'EXC': 1078c2ecf20Sopenharmony_ci if id not in self.spdx.exceptions: 1088c2ecf20Sopenharmony_ci raise ParserException(tok, 'Invalid Exception ID') 1098c2ecf20Sopenharmony_ci if self.lastid not in self.spdx.exceptions[id]: 1108c2ecf20Sopenharmony_ci raise ParserException(tok, 'Exception not valid for license %s' %self.lastid) 1118c2ecf20Sopenharmony_ci self.lastid = None 1128c2ecf20Sopenharmony_ci elif tok.type != 'WITH': 1138c2ecf20Sopenharmony_ci self.lastid = None 1148c2ecf20Sopenharmony_ci 1158c2ecf20Sopenharmony_ci # Lexer functions 1168c2ecf20Sopenharmony_ci def t_RPAR(self, tok): 1178c2ecf20Sopenharmony_ci r'\)' 1188c2ecf20Sopenharmony_ci self.lasttok = tok.type 1198c2ecf20Sopenharmony_ci return tok 1208c2ecf20Sopenharmony_ci 1218c2ecf20Sopenharmony_ci def t_LPAR(self, tok): 1228c2ecf20Sopenharmony_ci r'\(' 1238c2ecf20Sopenharmony_ci self.lasttok = tok.type 1248c2ecf20Sopenharmony_ci return tok 1258c2ecf20Sopenharmony_ci 1268c2ecf20Sopenharmony_ci def t_ID(self, tok): 1278c2ecf20Sopenharmony_ci r'[A-Za-z.0-9\-+]+' 1288c2ecf20Sopenharmony_ci 1298c2ecf20Sopenharmony_ci if self.lasttok == 'EXC': 1308c2ecf20Sopenharmony_ci print(tok) 1318c2ecf20Sopenharmony_ci raise ParserException(tok, 'Missing parentheses') 1328c2ecf20Sopenharmony_ci 1338c2ecf20Sopenharmony_ci tok.value = tok.value.strip() 1348c2ecf20Sopenharmony_ci val = tok.value.upper() 1358c2ecf20Sopenharmony_ci 1368c2ecf20Sopenharmony_ci if val in self.reserved: 1378c2ecf20Sopenharmony_ci tok.type = val 1388c2ecf20Sopenharmony_ci elif self.lasttok == 'WITH': 1398c2ecf20Sopenharmony_ci tok.type = 'EXC' 1408c2ecf20Sopenharmony_ci 1418c2ecf20Sopenharmony_ci self.lasttok = tok.type 1428c2ecf20Sopenharmony_ci self.validate(tok) 1438c2ecf20Sopenharmony_ci return tok 1448c2ecf20Sopenharmony_ci 1458c2ecf20Sopenharmony_ci def t_error(self, tok): 1468c2ecf20Sopenharmony_ci raise ParserException(tok, 'Invalid token') 1478c2ecf20Sopenharmony_ci 1488c2ecf20Sopenharmony_ci def p_expr(self, p): 1498c2ecf20Sopenharmony_ci '''expr : ID 1508c2ecf20Sopenharmony_ci | ID WITH EXC 1518c2ecf20Sopenharmony_ci | expr AND expr 1528c2ecf20Sopenharmony_ci | expr OR expr 1538c2ecf20Sopenharmony_ci | LPAR expr RPAR''' 1548c2ecf20Sopenharmony_ci pass 1558c2ecf20Sopenharmony_ci 1568c2ecf20Sopenharmony_ci def p_error(self, p): 1578c2ecf20Sopenharmony_ci if not p: 1588c2ecf20Sopenharmony_ci raise ParserException(None, 'Unfinished license expression') 1598c2ecf20Sopenharmony_ci else: 1608c2ecf20Sopenharmony_ci raise ParserException(p, 'Syntax error') 1618c2ecf20Sopenharmony_ci 1628c2ecf20Sopenharmony_ci def parse(self, expr): 1638c2ecf20Sopenharmony_ci self.lasttok = None 1648c2ecf20Sopenharmony_ci self.lastid = None 1658c2ecf20Sopenharmony_ci self.parser.parse(expr, lexer = self.lexer) 1668c2ecf20Sopenharmony_ci 1678c2ecf20Sopenharmony_ci def parse_lines(self, fd, maxlines, fname): 1688c2ecf20Sopenharmony_ci self.checked += 1 1698c2ecf20Sopenharmony_ci self.curline = 0 1708c2ecf20Sopenharmony_ci try: 1718c2ecf20Sopenharmony_ci for line in fd: 1728c2ecf20Sopenharmony_ci line = line.decode(locale.getpreferredencoding(False), errors='ignore') 1738c2ecf20Sopenharmony_ci self.curline += 1 1748c2ecf20Sopenharmony_ci if self.curline > maxlines: 1758c2ecf20Sopenharmony_ci break 1768c2ecf20Sopenharmony_ci self.lines_checked += 1 1778c2ecf20Sopenharmony_ci if line.find("SPDX-License-Identifier:") < 0: 1788c2ecf20Sopenharmony_ci continue 1798c2ecf20Sopenharmony_ci expr = line.split(':')[1].strip() 1808c2ecf20Sopenharmony_ci # Remove trailing comment closure 1818c2ecf20Sopenharmony_ci if line.strip().endswith('*/'): 1828c2ecf20Sopenharmony_ci expr = expr.rstrip('*/').strip() 1838c2ecf20Sopenharmony_ci # Remove trailing xml comment closure 1848c2ecf20Sopenharmony_ci if line.strip().endswith('-->'): 1858c2ecf20Sopenharmony_ci expr = expr.rstrip('-->').strip() 1868c2ecf20Sopenharmony_ci # Special case for SH magic boot code files 1878c2ecf20Sopenharmony_ci if line.startswith('LIST \"'): 1888c2ecf20Sopenharmony_ci expr = expr.rstrip('\"').strip() 1898c2ecf20Sopenharmony_ci self.parse(expr) 1908c2ecf20Sopenharmony_ci self.spdx_valid += 1 1918c2ecf20Sopenharmony_ci # 1928c2ecf20Sopenharmony_ci # Should we check for more SPDX ids in the same file and 1938c2ecf20Sopenharmony_ci # complain if there are any? 1948c2ecf20Sopenharmony_ci # 1958c2ecf20Sopenharmony_ci break 1968c2ecf20Sopenharmony_ci 1978c2ecf20Sopenharmony_ci except ParserException as pe: 1988c2ecf20Sopenharmony_ci if pe.tok: 1998c2ecf20Sopenharmony_ci col = line.find(expr) + pe.tok.lexpos 2008c2ecf20Sopenharmony_ci tok = pe.tok.value 2018c2ecf20Sopenharmony_ci sys.stdout.write('%s: %d:%d %s: %s\n' %(fname, self.curline, col, pe.txt, tok)) 2028c2ecf20Sopenharmony_ci else: 2038c2ecf20Sopenharmony_ci sys.stdout.write('%s: %d:0 %s\n' %(fname, self.curline, col, pe.txt)) 2048c2ecf20Sopenharmony_ci self.spdx_errors += 1 2058c2ecf20Sopenharmony_ci 2068c2ecf20Sopenharmony_cidef scan_git_tree(tree): 2078c2ecf20Sopenharmony_ci for el in tree.traverse(): 2088c2ecf20Sopenharmony_ci # Exclude stuff which would make pointless noise 2098c2ecf20Sopenharmony_ci # FIXME: Put this somewhere more sensible 2108c2ecf20Sopenharmony_ci if el.path.startswith("LICENSES"): 2118c2ecf20Sopenharmony_ci continue 2128c2ecf20Sopenharmony_ci if el.path.find("license-rules.rst") >= 0: 2138c2ecf20Sopenharmony_ci continue 2148c2ecf20Sopenharmony_ci if not os.path.isfile(el.path): 2158c2ecf20Sopenharmony_ci continue 2168c2ecf20Sopenharmony_ci with open(el.path, 'rb') as fd: 2178c2ecf20Sopenharmony_ci parser.parse_lines(fd, args.maxlines, el.path) 2188c2ecf20Sopenharmony_ci 2198c2ecf20Sopenharmony_cidef scan_git_subtree(tree, path): 2208c2ecf20Sopenharmony_ci for p in path.strip('/').split('/'): 2218c2ecf20Sopenharmony_ci tree = tree[p] 2228c2ecf20Sopenharmony_ci scan_git_tree(tree) 2238c2ecf20Sopenharmony_ci 2248c2ecf20Sopenharmony_ciif __name__ == '__main__': 2258c2ecf20Sopenharmony_ci 2268c2ecf20Sopenharmony_ci ap = ArgumentParser(description='SPDX expression checker') 2278c2ecf20Sopenharmony_ci ap.add_argument('path', nargs='*', help='Check path or file. If not given full git tree scan. For stdin use "-"') 2288c2ecf20Sopenharmony_ci ap.add_argument('-m', '--maxlines', type=int, default=15, 2298c2ecf20Sopenharmony_ci help='Maximum number of lines to scan in a file. Default 15') 2308c2ecf20Sopenharmony_ci ap.add_argument('-v', '--verbose', action='store_true', help='Verbose statistics output') 2318c2ecf20Sopenharmony_ci args = ap.parse_args() 2328c2ecf20Sopenharmony_ci 2338c2ecf20Sopenharmony_ci # Sanity check path arguments 2348c2ecf20Sopenharmony_ci if '-' in args.path and len(args.path) > 1: 2358c2ecf20Sopenharmony_ci sys.stderr.write('stdin input "-" must be the only path argument\n') 2368c2ecf20Sopenharmony_ci sys.exit(1) 2378c2ecf20Sopenharmony_ci 2388c2ecf20Sopenharmony_ci try: 2398c2ecf20Sopenharmony_ci # Use git to get the valid license expressions 2408c2ecf20Sopenharmony_ci repo = git.Repo(os.getcwd()) 2418c2ecf20Sopenharmony_ci assert not repo.bare 2428c2ecf20Sopenharmony_ci 2438c2ecf20Sopenharmony_ci # Initialize SPDX data 2448c2ecf20Sopenharmony_ci spdx = read_spdxdata(repo) 2458c2ecf20Sopenharmony_ci 2468c2ecf20Sopenharmony_ci # Initilize the parser 2478c2ecf20Sopenharmony_ci parser = id_parser(spdx) 2488c2ecf20Sopenharmony_ci 2498c2ecf20Sopenharmony_ci except SPDXException as se: 2508c2ecf20Sopenharmony_ci if se.el: 2518c2ecf20Sopenharmony_ci sys.stderr.write('%s: %s\n' %(se.el.path, se.txt)) 2528c2ecf20Sopenharmony_ci else: 2538c2ecf20Sopenharmony_ci sys.stderr.write('%s\n' %se.txt) 2548c2ecf20Sopenharmony_ci sys.exit(1) 2558c2ecf20Sopenharmony_ci 2568c2ecf20Sopenharmony_ci except Exception as ex: 2578c2ecf20Sopenharmony_ci sys.stderr.write('FAIL: %s\n' %ex) 2588c2ecf20Sopenharmony_ci sys.stderr.write('%s\n' %traceback.format_exc()) 2598c2ecf20Sopenharmony_ci sys.exit(1) 2608c2ecf20Sopenharmony_ci 2618c2ecf20Sopenharmony_ci try: 2628c2ecf20Sopenharmony_ci if len(args.path) and args.path[0] == '-': 2638c2ecf20Sopenharmony_ci stdin = os.fdopen(sys.stdin.fileno(), 'rb') 2648c2ecf20Sopenharmony_ci parser.parse_lines(stdin, args.maxlines, '-') 2658c2ecf20Sopenharmony_ci else: 2668c2ecf20Sopenharmony_ci if args.path: 2678c2ecf20Sopenharmony_ci for p in args.path: 2688c2ecf20Sopenharmony_ci if os.path.isfile(p): 2698c2ecf20Sopenharmony_ci parser.parse_lines(open(p, 'rb'), args.maxlines, p) 2708c2ecf20Sopenharmony_ci elif os.path.isdir(p): 2718c2ecf20Sopenharmony_ci scan_git_subtree(repo.head.reference.commit.tree, p) 2728c2ecf20Sopenharmony_ci else: 2738c2ecf20Sopenharmony_ci sys.stderr.write('path %s does not exist\n' %p) 2748c2ecf20Sopenharmony_ci sys.exit(1) 2758c2ecf20Sopenharmony_ci else: 2768c2ecf20Sopenharmony_ci # Full git tree scan 2778c2ecf20Sopenharmony_ci scan_git_tree(repo.head.commit.tree) 2788c2ecf20Sopenharmony_ci 2798c2ecf20Sopenharmony_ci if args.verbose: 2808c2ecf20Sopenharmony_ci sys.stderr.write('\n') 2818c2ecf20Sopenharmony_ci sys.stderr.write('License files: %12d\n' %spdx.license_files) 2828c2ecf20Sopenharmony_ci sys.stderr.write('Exception files: %12d\n' %spdx.exception_files) 2838c2ecf20Sopenharmony_ci sys.stderr.write('License IDs %12d\n' %len(spdx.licenses)) 2848c2ecf20Sopenharmony_ci sys.stderr.write('Exception IDs %12d\n' %len(spdx.exceptions)) 2858c2ecf20Sopenharmony_ci sys.stderr.write('\n') 2868c2ecf20Sopenharmony_ci sys.stderr.write('Files checked: %12d\n' %parser.checked) 2878c2ecf20Sopenharmony_ci sys.stderr.write('Lines checked: %12d\n' %parser.lines_checked) 2888c2ecf20Sopenharmony_ci sys.stderr.write('Files with SPDX: %12d\n' %parser.spdx_valid) 2898c2ecf20Sopenharmony_ci sys.stderr.write('Files with errors: %12d\n' %parser.spdx_errors) 2908c2ecf20Sopenharmony_ci 2918c2ecf20Sopenharmony_ci sys.exit(0) 2928c2ecf20Sopenharmony_ci 2938c2ecf20Sopenharmony_ci except Exception as ex: 2948c2ecf20Sopenharmony_ci sys.stderr.write('FAIL: %s\n' %ex) 2958c2ecf20Sopenharmony_ci sys.stderr.write('%s\n' %traceback.format_exc()) 2968c2ecf20Sopenharmony_ci sys.exit(1) 297