blob: c44b378a6f40b613d1f02db19ade873f638c736a [file] [log] [blame]
Leonardo Sandoval314eed82020-08-05 13:32:04 -05001#!/usr/bin/env python3
2#
Xinyu Zhang235d5ae2021-02-07 10:42:38 +08003# Copyright (c) 2019-2021, Arm Limited. All rights reserved.
Leonardo Sandoval314eed82020-08-05 13:32:04 -05004#
5# SPDX-License-Identifier: BSD-3-Clause
6#
7
8"""
9Check if a given file includes the copyright boiler plate.
10This checker supports the following comment styles:
11 /*
12 *
13 //
14 #
15"""
16
17import argparse
18import datetime
19import collections
20import fnmatch
21import shlex
22import os
23import re
24import sys
25import utils
26from itertools import islice
27
28# File extensions to check
29VALID_FILE_EXTENSIONS = ('.c', '.conf', '.dts', '.dtsi', '.editorconfig',
30 '.h', '.i', '.ld', 'Makefile', '.mk', '.msvc',
31 '.py', '.S', '.scat', '.sh')
32
33# Paths inside the tree to ignore. Hidden folders and files are always ignored.
34# They mustn't end in '/'.
35IGNORED_FOLDERS = (
Xinyu Zhang235d5ae2021-02-07 10:42:38 +080036 'bl2/ext',
37 'docs',
Antonio de Angelis2e526ca2024-04-11 15:44:46 +010038 'interface/include/mbedtls',
Xinyu Zhang235d5ae2021-02-07 10:42:38 +080039 'lib',
Antonio de Angelis2e526ca2024-04-11 15:44:46 +010040 'platform/ext',
Xinyu Zhang235d5ae2021-02-07 10:42:38 +080041 'tools'
Leonardo Sandoval314eed82020-08-05 13:32:04 -050042)
43
44# List of ignored files in folders that aren't ignored
Antonio de Angelis2e526ca2024-04-11 15:44:46 +010045IGNORED_FILES = (
46 'interface/include/psa/build_info.h',
47 'interface/include/psa/crypto.h',
48 'interface/include/psa/crypto_adjust_auto_enabled.h',
49 'interface/include/psa/crypto_adjust_config_key_pair_types.h',
50 'interface/include/psa/crypto_adjust_config_synonyms.h',
51 'interface/include/psa/crypto_builtin_composites.h',
52 'interface/include/psa/crypto_builtin_key_derivation.h',
53 'interface/include/psa/crypto_builtin_primitives.h',
54 'interface/include/psa/crypto_compat.h',
55 'interface/include/psa/crypto_driver_common.h',
56 'interface/include/psa/crypto_driver_contexts_composites.h',
57 'interface/include/psa/crypto_driver_contexts_key_derivation.h',
58 'interface/include/psa/crypto_driver_contexts_primitives.h',
59 'interface/include/psa/crypto_extra.h',
60 'interface/include/psa/crypto_legacy.h',
61 'interface/include/psa/crypto_platform.h',
62 'interface/include/psa/crypto_se_driver.h',
63 'interface/include/psa/crypto_sizes.h',
64 'interface/include/psa/crypto_struct.h',
65 'interface/include/psa/crypto_types.h',
66 'interface/include/psa/crypto_values.h'
67)
Leonardo Sandoval314eed82020-08-05 13:32:04 -050068
69# Supported comment styles (Python regex)
70COMMENT_PATTERN = '(\*|/\*|\#|//)'
71
72# Any combination of spaces and/or tabs
73SPACING = '[ \t]*'
74
75# Line must start with a comment and optional spacing
76LINE_START = '^' + SPACING + COMMENT_PATTERN + SPACING
77
78# Line end with optional spacing
79EOL = SPACING + '$'
80
Antonio de Angelis2e526ca2024-04-11 15:44:46 +010081# Year or period as YYYY or YYYY-YYYY, or nothing as per the
82# Linux Foundation copyright notice recommendation
83TIME_PERIOD = '([0-9]{4}(-[0-9]{4})?)?'
Leonardo Sandoval314eed82020-08-05 13:32:04 -050084
85# Any string with valid license ID, don't allow adding postfix
86LICENSE_ID = '.*(BSD-3-Clause|BSD-2-Clause-FreeBSD)([ ,.\);].*)?'
87
88# File must contain both lines to pass the check
89COPYRIGHT_LINE = LINE_START + 'Copyright' + '.*' + TIME_PERIOD + '.*' + EOL
90LICENSE_ID_LINE = LINE_START + 'SPDX-License-Identifier:' + LICENSE_ID + EOL
91
92# Compiled license patterns
93COPYRIGHT_PATTERN = re.compile(COPYRIGHT_LINE, re.MULTILINE)
94LICENSE_ID_PATTERN = re.compile(LICENSE_ID_LINE, re.MULTILINE)
95
96CURRENT_YEAR = str(datetime.datetime.now().year)
97
98COPYRIGHT_OK = 0
99COPYRIGHT_ERROR = 1
100
101def check_copyright(path, args, encoding='utf-8'):
102 '''Checks a file for a correct copyright header.'''
103
104 result = COPYRIGHT_OK
105
106 with open(path, encoding=encoding) as file_:
107 file_content = file_.read()
108
109 copyright_line = COPYRIGHT_PATTERN.search(file_content)
110 if not copyright_line:
111 print("ERROR: Missing copyright in " + file_.name)
112 result = COPYRIGHT_ERROR
Leonardo Sandoval314eed82020-08-05 13:32:04 -0500113
114 if not LICENSE_ID_PATTERN.search(file_content):
115 print("ERROR: License ID error in " + file_.name)
116 result = COPYRIGHT_ERROR
117
118 return result
119
120def main(args):
121 print("Checking the copyrights in the code...")
122
123 if args.verbose:
124 print ("Copyright regexp: " + COPYRIGHT_LINE)
125 print ("License regexp: " + LICENSE_ID_LINE)
126
127 if args.patch:
128 print("Checking files modified between patches " + args.from_ref
129 + " and " + args.to_ref + "...")
130
131 (rc, stdout, stderr) = utils.shell_command(['git', 'diff',
132 '--diff-filter=ACMRT', '--name-only', args.from_ref, args.to_ref ])
133 if rc:
134 return COPYRIGHT_ERROR
135
136 files = stdout.splitlines()
137
138 else:
139 print("Checking all files tracked by git...")
140
141 (rc, stdout, stderr) = utils.shell_command([ 'git', 'ls-files' ])
142 if rc:
143 return COPYRIGHT_ERROR
144
145 files = stdout.splitlines()
146
147 count_ok = 0
148 count_warning = 0
149 count_error = 0
150
151 for f in files:
152
153 if utils.file_is_ignored(f, VALID_FILE_EXTENSIONS, IGNORED_FILES, IGNORED_FOLDERS):
154 if args.verbose:
155 print("Ignoring file " + f)
156 continue
157
158 if args.verbose:
159 print("Checking file " + f)
160
161 rc = check_copyright(f, args)
162
163 if rc == COPYRIGHT_OK:
164 count_ok += 1
165 elif rc == COPYRIGHT_ERROR:
166 count_error += 1
167
168 print("\nSummary:")
169 print("\t{} files analyzed".format(count_ok + count_error))
170
171 if count_error == 0:
172 print("\tNo errors found")
173 return COPYRIGHT_OK
174 else:
175 print("\t{} errors found".format(count_error))
176 return COPYRIGHT_ERROR
177
178def parse_cmd_line(argv, prog_name):
179 parser = argparse.ArgumentParser(
180 prog=prog_name,
181 formatter_class=argparse.RawTextHelpFormatter,
182 description="Check copyright of all files of codebase",
183 epilog="""
184For each source file in the tree, checks that the copyright header
185has the correct format.
186""")
187
188 parser.add_argument("--tree", "-t",
189 help="Path to the source tree to check (default: %(default)s)",
190 default=os.curdir)
191
192 parser.add_argument("--verbose", "-v",
193 help="Increase verbosity to the source tree to check (default: %(default)s)",
194 action='store_true', default=False)
195
196 parser.add_argument("--patch", "-p",
197 help="""
198Patch mode.
199Instead of checking all files in the source tree, the script will consider
200only files that are modified by the latest patch(es).""",
201 action="store_true")
202
Leonardo Sandoval900de582020-09-07 18:34:57 -0500203 (rc, stdout, stderr) = utils.shell_command(['git', 'merge-base', 'HEAD', 'origin/master'])
Leonardo Sandoval314eed82020-08-05 13:32:04 -0500204 if rc:
205 print("Git merge-base command failed. Cannot determine base commit.")
206 sys.exit(rc)
207 merge_bases = stdout.splitlines()
208
209 # This should not happen, but it's better to be safe.
210 if len(merge_bases) > 1:
211 print("WARNING: Multiple merge bases found. Using the first one as base commit.")
212
213 parser.add_argument("--from-ref",
214 help="Base commit in patch mode (default: %(default)s)",
215 default=merge_bases[0])
216 parser.add_argument("--to-ref",
217 help="Final commit in patch mode (default: %(default)s)",
218 default="HEAD")
219
220 args = parser.parse_args(argv)
221 return args
222
223
224if __name__ == "__main__":
225 args = parse_cmd_line(sys.argv[1:], sys.argv[0])
226
227 os.chdir(args.tree)
228
229 rc = main(args)
230
231 sys.exit(rc)