summaryrefslogtreecommitdiff
path: root/script/static-checks/check-copyright.py
blob: c44b378a6f40b613d1f02db19ade873f638c736a (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
#!/usr/bin/env python3
#
# Copyright (c) 2019-2021, Arm Limited. All rights reserved.
#
# SPDX-License-Identifier: BSD-3-Clause
#

"""
Check if a given file includes the copyright boiler plate.
This checker supports the following comment styles:
    /*
    *
    //
    #
"""

import argparse
import datetime
import collections
import fnmatch
import shlex
import os
import re
import sys
import utils
from itertools import islice

# File extensions to check
VALID_FILE_EXTENSIONS = ('.c', '.conf', '.dts', '.dtsi', '.editorconfig',
                         '.h', '.i', '.ld', 'Makefile', '.mk', '.msvc',
                         '.py', '.S', '.scat', '.sh')

# Paths inside the tree to ignore. Hidden folders and files are always ignored.
# They mustn't end in '/'.
IGNORED_FOLDERS = (
    'bl2/ext',
    'docs',
    'interface/include/mbedtls',
    'lib',
    'platform/ext',
    'tools'
)

# List of ignored files in folders that aren't ignored
IGNORED_FILES = (
    'interface/include/psa/build_info.h',
    'interface/include/psa/crypto.h',
    'interface/include/psa/crypto_adjust_auto_enabled.h',
    'interface/include/psa/crypto_adjust_config_key_pair_types.h',
    'interface/include/psa/crypto_adjust_config_synonyms.h',
    'interface/include/psa/crypto_builtin_composites.h',
    'interface/include/psa/crypto_builtin_key_derivation.h',
    'interface/include/psa/crypto_builtin_primitives.h',
    'interface/include/psa/crypto_compat.h',
    'interface/include/psa/crypto_driver_common.h',
    'interface/include/psa/crypto_driver_contexts_composites.h',
    'interface/include/psa/crypto_driver_contexts_key_derivation.h',
    'interface/include/psa/crypto_driver_contexts_primitives.h',
    'interface/include/psa/crypto_extra.h',
    'interface/include/psa/crypto_legacy.h',
    'interface/include/psa/crypto_platform.h',
    'interface/include/psa/crypto_se_driver.h',
    'interface/include/psa/crypto_sizes.h',
    'interface/include/psa/crypto_struct.h',
    'interface/include/psa/crypto_types.h',
    'interface/include/psa/crypto_values.h'
)

# Supported comment styles (Python regex)
COMMENT_PATTERN = '(\*|/\*|\#|//)'

# Any combination of spaces and/or tabs
SPACING = '[ \t]*'

# Line must start with a comment and optional spacing
LINE_START = '^' + SPACING + COMMENT_PATTERN + SPACING

# Line end with optional spacing
EOL = SPACING + '$'

# Year or period as YYYY or YYYY-YYYY, or nothing as per the
# Linux Foundation copyright notice recommendation
TIME_PERIOD = '([0-9]{4}(-[0-9]{4})?)?'

# Any string with valid license ID, don't allow adding postfix
LICENSE_ID = '.*(BSD-3-Clause|BSD-2-Clause-FreeBSD)([ ,.\);].*)?'

# File must contain both lines to pass the check
COPYRIGHT_LINE = LINE_START + 'Copyright' + '.*' + TIME_PERIOD + '.*' + EOL
LICENSE_ID_LINE = LINE_START + 'SPDX-License-Identifier:' + LICENSE_ID + EOL

# Compiled license patterns
COPYRIGHT_PATTERN = re.compile(COPYRIGHT_LINE, re.MULTILINE)
LICENSE_ID_PATTERN = re.compile(LICENSE_ID_LINE, re.MULTILINE)

CURRENT_YEAR = str(datetime.datetime.now().year)

COPYRIGHT_OK = 0
COPYRIGHT_ERROR = 1

def check_copyright(path, args, encoding='utf-8'):
    '''Checks a file for a correct copyright header.'''

    result = COPYRIGHT_OK

    with open(path, encoding=encoding) as file_:
        file_content = file_.read()

    copyright_line = COPYRIGHT_PATTERN.search(file_content)
    if not copyright_line:
        print("ERROR: Missing copyright in " + file_.name)
        result = COPYRIGHT_ERROR

    if not LICENSE_ID_PATTERN.search(file_content):
        print("ERROR: License ID error in " + file_.name)
        result = COPYRIGHT_ERROR

    return result

def main(args):
    print("Checking the copyrights in the code...")

    if args.verbose:
        print ("Copyright regexp: " + COPYRIGHT_LINE)
        print ("License regexp: " + LICENSE_ID_LINE)

    if args.patch:
        print("Checking files modified between patches " + args.from_ref
              + " and " + args.to_ref + "...")

        (rc, stdout, stderr) = utils.shell_command(['git', 'diff',
            '--diff-filter=ACMRT', '--name-only', args.from_ref, args.to_ref ])
        if rc:
            return COPYRIGHT_ERROR

        files = stdout.splitlines()

    else:
        print("Checking all files tracked by git...")

        (rc, stdout, stderr) = utils.shell_command([ 'git', 'ls-files' ])
        if rc:
            return COPYRIGHT_ERROR

        files = stdout.splitlines()

    count_ok = 0
    count_warning = 0
    count_error = 0

    for f in files:

        if utils.file_is_ignored(f, VALID_FILE_EXTENSIONS, IGNORED_FILES, IGNORED_FOLDERS):
            if args.verbose:
                print("Ignoring file " + f)
            continue

        if args.verbose:
            print("Checking file " + f)

        rc = check_copyright(f, args)

        if rc == COPYRIGHT_OK:
            count_ok += 1
        elif rc == COPYRIGHT_ERROR:
            count_error += 1

    print("\nSummary:")
    print("\t{} files analyzed".format(count_ok + count_error))

    if count_error == 0:
        print("\tNo errors found")
        return COPYRIGHT_OK
    else:
        print("\t{} errors found".format(count_error))
        return COPYRIGHT_ERROR

def parse_cmd_line(argv, prog_name):
    parser = argparse.ArgumentParser(
        prog=prog_name,
        formatter_class=argparse.RawTextHelpFormatter,
        description="Check copyright of all files of codebase",
        epilog="""
For each source file in the tree, checks that the copyright header
has the correct format.
""")

    parser.add_argument("--tree", "-t",
                        help="Path to the source tree to check (default: %(default)s)",
                        default=os.curdir)

    parser.add_argument("--verbose", "-v",
                        help="Increase verbosity to the source tree to check (default: %(default)s)",
                        action='store_true', default=False)

    parser.add_argument("--patch", "-p",
                        help="""
Patch mode.
Instead of checking all files in the source tree, the script will consider
only files that are modified by the latest patch(es).""",
                        action="store_true")

    (rc, stdout, stderr) = utils.shell_command(['git', 'merge-base', 'HEAD', 'origin/master'])
    if rc:
        print("Git merge-base command failed. Cannot determine base commit.")
        sys.exit(rc)
    merge_bases = stdout.splitlines()

    # This should not happen, but it's better to be safe.
    if len(merge_bases) > 1:
        print("WARNING: Multiple merge bases found. Using the first one as base commit.")

    parser.add_argument("--from-ref",
                        help="Base commit in patch mode (default: %(default)s)",
                        default=merge_bases[0])
    parser.add_argument("--to-ref",
                        help="Final commit in patch mode (default: %(default)s)",
                        default="HEAD")

    args = parser.parse_args(argv)
    return args


if __name__ == "__main__":
    args = parse_cmd_line(sys.argv[1:], sys.argv[0])

    os.chdir(args.tree)

    rc = main(args)

    sys.exit(rc)