Commit 0509b270 authored by Thomas Gleixner's avatar Thomas Gleixner Committed by Greg Kroah-Hartman
Browse files

scripts/spdxcheck: Put excluded files and directories into a separate file



The files and directories which are excluded from scanning are currently
hard coded in the script. That's not maintainable and not accessible for
external tools.

Move the files and directories which should be excluded into a file.  The
default file is scripts/spdxexclude. This can be overridden with the
'-e $FILE' command line option.

The file format and syntax is similar to the .gitignore file.

Signed-off-by: default avatarThomas Gleixner <tglx@linutronix.de>
Signed-off-by: default avatarGreg Kroah-Hartman <gregkh@linuxfoundation.org>
parent 67924b71
Loading
Loading
Loading
Loading
+64 −6
Original line number Diff line number Diff line
@@ -6,6 +6,7 @@ from argparse import ArgumentParser
from ply import lex, yacc
import locale
import traceback
import fnmatch
import sys
import git
import re
@@ -106,6 +107,7 @@ class id_parser(object):
        self.parser = yacc.yacc(module = self, write_tables = False, debug = False)
        self.lines_checked = 0
        self.checked = 0
        self.excluded = 0
        self.spdx_valid = 0
        self.spdx_errors = 0
        self.spdx_dirs = {}
@@ -254,17 +256,47 @@ class id_parser(object):
        di.update(fname, base, fail)
        self.spdx_dirs[base] = di

class pattern(object):
    def __init__(self, line):
        self.pattern = line
        self.match = self.match_file
        if line == '.*':
            self.match = self.match_dot
        elif line.endswith('/'):
            self.pattern = line[:-1]
            self.match = self.match_dir
        elif line.startswith('/'):
            self.pattern = line[1:]
            self.match = self.match_fn

    def match_dot(self, fpath):
        return os.path.basename(fpath).startswith('.')

    def match_file(self, fpath):
        return os.path.basename(fpath) == self.pattern

    def match_fn(self, fpath):
        return fnmatch.fnmatchcase(fpath, self.pattern)

    def match_dir(self, fpath):
        if self.match_fn(os.path.dirname(fpath)):
            return True
        return fpath.startswith(self.pattern)

def exclude_file(fpath):
    for rule in exclude_rules:
        if rule.match(fpath):
            return True
    return False

def scan_git_tree(tree, basedir, dirdepth):
    parser.set_dirinfo(basedir, dirdepth)
    for el in tree.traverse():
        # Exclude stuff which would make pointless noise
        # FIXME: Put this somewhere more sensible
        if el.path.startswith("LICENSES"):
            continue
        if el.path.find("license-rules.rst") >= 0:
            continue
        if not os.path.isfile(el.path):
            continue
        if exclude_file(el.path):
            parser.excluded += 1
            continue
        with open(el.path, 'rb') as fd:
            parser.parse_lines(fd, args.maxlines, el.path)

@@ -273,6 +305,20 @@ def scan_git_subtree(tree, path, dirdepth):
        tree = tree[p]
    scan_git_tree(tree, path.strip('/'), dirdepth)

def read_exclude_file(fname):
    rules = []
    if not fname:
        return rules
    with open(fname) as fd:
        for line in fd:
            line = line.strip()
            if line.startswith('#'):
                continue
            if not len(line):
                continue
            rules.append(pattern(line))
    return rules

if __name__ == '__main__':

    ap = ArgumentParser(description='SPDX expression checker')
@@ -281,6 +327,8 @@ if __name__ == '__main__':
                    help='Show [sub]directory statistics.')
    ap.add_argument('-D', '--depth', type=int, default=-1,
                    help='Directory depth for -d statistics. Default: unlimited')
    ap.add_argument('-e', '--exclude',
                    help='File containing file patterns to exclude. Default: scripts/spdxexclude')
    ap.add_argument('-f', '--files', action='store_true',
                    help='Show files without SPDX.')
    ap.add_argument('-m', '--maxlines', type=int, default=15,
@@ -316,6 +364,15 @@ if __name__ == '__main__':
        sys.stderr.write('%s\n' %traceback.format_exc())
        sys.exit(1)

    try:
        fname = args.exclude
        if not fname:
            fname = os.path.join(os.path.dirname(__file__), 'spdxexclude')
        exclude_rules = read_exclude_file(fname)
    except Exception as ex:
        sys.stderr.write('FAIL: Reading exclude file %s: %s\n' %(fname, ex))
        sys.exit(1)

    try:
        if len(args.path) and args.path[0] == '-':
            stdin = os.fdopen(sys.stdin.fileno(), 'rb')
@@ -349,6 +406,7 @@ if __name__ == '__main__':
                sys.stderr.write('License IDs        %12d\n' %len(spdx.licenses))
                sys.stderr.write('Exception IDs      %12d\n' %len(spdx.exceptions))
                sys.stderr.write('\n')
                sys.stderr.write('Files excluded:    %12d\n' %parser.excluded)
                sys.stderr.write('Files checked:     %12d\n' %parser.checked)
                sys.stderr.write('Lines checked:     %12d\n' %parser.lines_checked)
                if parser.checked:

scripts/spdxexclude

0 → 100644
+8 −0
Original line number Diff line number Diff line
# SPDX-License-Identifier: GPL-2.0
#
# Patterns for excluding files and directories

# Ignore the license directory and the licensing documentation which would
# create lots of noise for no value
LICENSES/
license-rules.rst