gdb: fix bugs in gdb/copyright.py, make it use glob patterns

gdb/copyright.py currently changes some files that it shouldn't:

 - despite having a `gnulib/import` entry in EXCLUDE_LIST, it does
   change the files under that directory
 - it is missing `sim/Makefile.in`

Change the exclude list logic to use glob patterns.  This makes it
easier to specify exclusions of full directories or files by basename,
while simplifying the code.

Merge EXCLUDE_LIST and NOT_FSF_LIST, since there's no fundamental reason
to keep them separate (they are treated identically).  I kept the
comment that explains that some files are excluded due to not being
FSF-licensed.

Merge EXCLUDE_ALL_LIST in EXCLUDE_LIST, converting the entries to glob
patterns that match everywhere in the tree (e.g. `**/configure`).

Tested by running the script on the parent commit of d01e823438
("Update copyright dates to include 2025") and diff'ing the result with
d01e823438.  The only differences are:

 - the files that we don't want to modify (gnulib/import and
   sim/Makefile.in)
 - the files that need to be modified by hand

Running the script on latest master produces no diff.

Change-Id: I318dc3bff34e4b3a9b66ea305d0c3872f69cd072
Reviewed-By: Guinevere Larsen <guinevere@redhat.com>
This commit is contained in:
Simon Marchi
2025-04-11 11:08:06 -04:00
parent 2c00b2fbb8
commit fc0747d0b1

View File

@@ -36,6 +36,7 @@ import argparse
import locale
import os
import os.path
import pathlib
import subprocess
import sys
from typing import Iterable
@@ -68,19 +69,15 @@ def get_update_list():
.split("\0")
)
full_exclude_list = EXCLUDE_LIST + BY_HAND
def include_file(filename: str):
(dirname, basename) = os.path.split(filename)
dirbasename = os.path.basename(dirname)
return not (
basename in EXCLUDE_ALL_LIST
or dirbasename in EXCLUDE_ALL_LIST
or dirname in EXCLUDE_LIST
or dirname in NOT_FSF_LIST
or dirname in BY_HAND
or filename in EXCLUDE_LIST
or filename in NOT_FSF_LIST
or filename in BY_HAND
)
path = pathlib.Path(filename)
for pattern in full_exclude_list:
if path.full_match(pattern):
return False
return True
return filter(include_file, result)
@@ -212,8 +209,14 @@ def main(argv: list[str]) -> int | None:
# generated, non-FSF, or otherwise special (e.g. license text,
# or test cases which must be sensitive to line numbering).
#
# Filenames are relative to the root directory.
# Entries are treated as glob patterns.
EXCLUDE_LIST = (
"**/aclocal.m4",
"**/configure",
"**/COPYING.LIB",
"**/COPYING",
"**/fdl.texi",
"**/gpl.texi",
"gdb/copying.c",
"gdb/nat/glibc_thread_db.h",
"gdb/CONTRIBUTE",
@@ -221,45 +224,11 @@ EXCLUDE_LIST = (
"gdbsupport/unordered_dense.h",
"gnulib/doc/gendocs_template",
"gnulib/doc/gendocs_template_min",
"gnulib/import",
"gnulib/import/**",
"gnulib/config.in",
"gnulib/Makefile.in",
)
# Files which should not be modified, either because they are
# generated, non-FSF, or otherwise special (e.g. license text,
# or test cases which must be sensitive to line numbering).
#
# Matches any file or directory name anywhere. Use with caution.
# This is mostly for files that can be found in multiple directories.
# Eg: We want all files named COPYING to be left untouched.
EXCLUDE_ALL_LIST = (
"COPYING",
"COPYING.LIB",
"configure",
"fdl.texi",
"gpl.texi",
"aclocal.m4",
)
# The list of files to update by hand.
BY_HAND: tuple[str, ...] = (
# Nothing at the moment :-).
)
# Files containing multiple copyright headers. This script is only
# fixing the first one it finds, so we need to finish the update
# by hand.
MULTIPLE_COPYRIGHT_HEADERS = (
"gdb/doc/gdb.texinfo",
"gdb/doc/refcard.tex",
"gdb/syscalls/update-netbsd.sh",
)
# The list of file which have a copyright, but not held by the FSF.
# Filenames are relative to the root directory.
NOT_FSF_LIST = (
"sim/Makefile.in",
# The files below have a copyright, but not held by the FSF.
"gdb/exc_request.defs",
"gdb/gdbtk",
"gdb/testsuite/gdb.gdbtk/",
@@ -296,9 +265,27 @@ NOT_FSF_LIST = (
"sim/mips/sim-main.c",
"sim/moxie/moxie-gdb.dts",
# Not a single file in sim/ppc/ appears to be copyright FSF :-(.
"sim/ppc",
"sim/ppc/**",
"sim/testsuite/mips/mips32-dsp2.s",
)
# The list of files to update by hand.
#
# Entries are treated as glob patterns.
BY_HAND: tuple[str, ...] = (
# Nothing at the moment :-).
)
# Files containing multiple copyright headers. This script is only
# fixing the first one it finds, so we need to finish the update
# by hand.
#
# Entries are treated as glob patterns.
MULTIPLE_COPYRIGHT_HEADERS = (
"gdb/doc/gdb.texinfo",
"gdb/doc/refcard.tex",
"gdb/syscalls/update-netbsd.sh",
)
if __name__ == "__main__":
sys.exit(main(sys.argv[1:]))