forked from Imagelibrary/binutils-gdb
[gdb/contrib] Add script to license check new files
While reading through gdb-patches backlog after a return from PTO, I noticed that a newly added file was licensed with "MIT", and that license was not listed in Fedora's gdb.spec file. [Fedora no longer supports "effective" licenses.] That lead me to this simple script which generates a list of all the newly added files between two given commits and scans these files for licenses. Example usage: bash$ cd /path/to/binutils-gdb/gdb bash$ ./contrib/license-check-new-files.sh -s gdb-15-branchpoint gdb-16-branchpoint Scanning directories gdb*/... gdb/contrib/common-misspellings.txt: no longer in repo? gdb/contrib/spellcheck.sh: no longer in repo? gdbsupport/unordered_dense.h: MIT I don't think anything in here is Fedora- or RPM-specific, so I'd like to submit this for consideration for inclusion in contrib/. I believe other distros may find it useful. Approved-By: Tom Tromey <tom@tromey.com>
This commit is contained in:
149
gdb/contrib/license-check-new-files.sh
Executable file
149
gdb/contrib/license-check-new-files.sh
Executable file
@@ -0,0 +1,149 @@
|
||||
#!/usr/bin/env python3
|
||||
|
||||
# Copyright (C) 2025 Free Software Foundation, Inc.
|
||||
#
|
||||
# This file is part of GDB.
|
||||
#
|
||||
# This program is free software; you can redistribute it and/or modify
|
||||
# it under the terms of the GNU General Public License as published by
|
||||
# the Free Software Foundation; either version 3 of the License, or
|
||||
# (at your option) any later version.
|
||||
#
|
||||
# This program is distributed in the hope that it will be useful,
|
||||
# but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
# GNU General Public License for more details.
|
||||
#
|
||||
# You should have received a copy of the GNU General Public License
|
||||
# along with this program. If not, see <http://www.gnu.org/licenses/>.
|
||||
|
||||
# This program requires the python modules GitPython (git) and scancode-toolkit.
|
||||
# It builds a list of all the newly added files to the repository and scans
|
||||
# each file for a license, printing it to the terminal. If "--skip" is used,
|
||||
# it will only output non-"common" licenses, e.g., omitting "GPL-3.0-or-later".
|
||||
# This makes it a little bit easier to detect any possible new licenses.
|
||||
#
|
||||
# Example:
|
||||
# bash$ cd /path/to/binutils-gdb/gdb
|
||||
# bash$ ./contrib/license-check-new-files.sh -s gdb-15-branchpoint gdb-16-branchpoint
|
||||
# Scanning directories gdb*/...
|
||||
# gdb/contrib/common-misspellings.txt: no longer in repo?
|
||||
# gdb/contrib/spellcheck.sh: no longer in repo?
|
||||
# gdbsupport/unordered_dense.h: MIT
|
||||
|
||||
import os
|
||||
import sys
|
||||
import argparse
|
||||
from pathlib import PurePath
|
||||
from git import Repo
|
||||
from scancode import api
|
||||
|
||||
# A list of "common" licenses. If "--skip" is used, any file
|
||||
# with a license in this list will be omitted from the output.
|
||||
COMMON_LICENSES = ["GPL-2.0-or-later", "GPL-3.0-or-later"]
|
||||
|
||||
# Default list of directories to scan. Default scans are limited to
|
||||
# gdb-specific git directories because much of the rest of binutils-gdb
|
||||
# is actually owned by other projects/packages.
|
||||
DEFAULT_SCAN_DIRS = "gdb*"
|
||||
|
||||
|
||||
# Get the commit object associated with the string commit CSTR
|
||||
# from the git repository REPO.
|
||||
#
|
||||
# Returns the object or prints an error and exits.
|
||||
def get_commit(repo, cstr):
|
||||
try:
|
||||
return repo.commit(cstr)
|
||||
except:
|
||||
print(f'unknown commit "{cstr}"')
|
||||
sys.exit(2)
|
||||
|
||||
|
||||
# Uses scancode-toolkit package to scan FILE's licenses.
|
||||
# Returns the full license dict from scancode on success or
|
||||
# propagates any exceptions.
|
||||
def get_licenses_for_file(file):
|
||||
return api.get_licenses(file)
|
||||
|
||||
|
||||
# Helper function to print FILE to the terminal if skipping
|
||||
# common licenses.
|
||||
def skip_print_file(skip, file):
|
||||
if skip:
|
||||
print(f"{file}: ", end="")
|
||||
|
||||
|
||||
def main(argv):
|
||||
parser = argparse.ArgumentParser()
|
||||
parser.add_argument("from_commit")
|
||||
parser.add_argument("to_commit")
|
||||
parser.add_argument(
|
||||
"-s", "--skip", help="skip common licenses in output", action="store_true"
|
||||
)
|
||||
parser.add_argument(
|
||||
"-p",
|
||||
"--paths",
|
||||
help=f'paths to scan (default is "{DEFAULT_SCAN_DIRS}")',
|
||||
type=str,
|
||||
default=DEFAULT_SCAN_DIRS,
|
||||
)
|
||||
args = parser.parse_args()
|
||||
|
||||
# Commit boundaries to search for new files
|
||||
from_commit = args.from_commit
|
||||
to_commit = args.to_commit
|
||||
|
||||
# Get the list of new files from git. Try the current directory,
|
||||
# looping up to the root attempting to find a valid git repository.
|
||||
path = PurePath(os.getcwd())
|
||||
paths = list(path.parents)
|
||||
paths.insert(0, path)
|
||||
for dir in paths:
|
||||
try:
|
||||
repo = Repo(dir)
|
||||
break
|
||||
except:
|
||||
pass
|
||||
|
||||
if dir == path.parents[-1]:
|
||||
print(f'not a git repository (or any parent up to mount point "{dir}")')
|
||||
sys.exit(2)
|
||||
|
||||
# Get from/to commits
|
||||
fc = get_commit(repo, from_commit)
|
||||
tc = get_commit(repo, to_commit)
|
||||
|
||||
# Loop over new files
|
||||
paths = [str(dir) for dir in args.paths.split(",")]
|
||||
print(f'Scanning directories {",".join(f"{s}/" for s in paths)}...')
|
||||
for file in fc.diff(tc, paths=paths).iter_change_type("A"):
|
||||
filename = file.a_path
|
||||
if not args.skip:
|
||||
print(f"checking licenses for {filename}... ", end="", flush=True)
|
||||
try:
|
||||
f = dir.joinpath(dir, filename).as_posix()
|
||||
lic = get_licenses_for_file(f)
|
||||
if len(lic["license_clues"]) > 1:
|
||||
print("multiple licenses detected")
|
||||
elif (
|
||||
not args.skip
|
||||
or lic["detected_license_expression_spdx"] not in COMMON_LICENSES
|
||||
):
|
||||
skip_print_file(args.skip, filename)
|
||||
print(f"{lic['detected_license_expression_spdx']}")
|
||||
except OSError:
|
||||
# Likely hit a file that was added to the repo and subsequently removed.
|
||||
skip_print_file(args.skip, filename)
|
||||
print("no longer in repo?")
|
||||
except KeyboardInterrupt:
|
||||
print("interrupted")
|
||||
break
|
||||
except Exception as e:
|
||||
# If scanning fails, there is little we can do but print an error.
|
||||
skip_print_file(args.skip, filename)
|
||||
print(e)
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
main(sys.argv)
|
||||
Reference in New Issue
Block a user