Okash Khawaja | 671bed2 | 2022-11-10 15:51:19 +0000 | [diff] [blame] | 1 | #!/usr/bin/env python3 |
| 2 | # |
| 3 | # Copyright (c) 2022 Google LLC. All rights reserved. |
| 4 | # |
| 5 | # SPDX-License-Identifier: BSD-3-Clause |
| 6 | |
| 7 | # quick hacky script to check patches if they are candidates for lts. it checks |
| 8 | # only the non-merge commits. |
| 9 | |
| 10 | import pkg_resources |
| 11 | import os |
| 12 | import git |
| 13 | import re |
| 14 | import argparse |
| 15 | from io import StringIO |
| 16 | pkg_resources.require("unidiff>=0.7.4") |
| 17 | from unidiff import PatchSet |
| 18 | |
| 19 | global_debug = False |
| 20 | def debug_print(*args, **kwargs): |
| 21 | global global_var |
| 22 | if global_debug: |
| 23 | print(*args, **kwargs) |
| 24 | |
| 25 | def contains_re(pf, tok): |
| 26 | for hnk in pf: |
| 27 | for ln in hnk: |
| 28 | if ln.is_context: |
| 29 | continue |
| 30 | # here means the line is either added or removed |
| 31 | txt = ln.value.strip() |
| 32 | if tok.search(txt) is not None: |
| 33 | return True |
| 34 | |
| 35 | return False |
| 36 | |
| 37 | def process_ps(ps): |
| 38 | score = 0 |
| 39 | |
| 40 | cpu_tok = re.compile(CPU_PATH_TOKEN) |
| 41 | doc_tok = re.compile(DOC_PATH_TOKEN) |
| 42 | |
| 43 | for pf in ps: |
| 44 | if pf.is_binary_file or not pf.is_modified_file: |
| 45 | continue |
| 46 | if cpu_tok.search(pf.path) is not None: |
| 47 | debug_print("* change found in cpu path:", pf.path); |
| 48 | cpu_tok = re.compile(CPU_ERRATA_TOKEN) |
| 49 | if contains_re(pf, cpu_tok): |
| 50 | score = score + 1 |
| 51 | debug_print(" found", CPU_ERRATA_TOKEN) |
| 52 | |
| 53 | if doc_tok.search(pf.path) is not None: |
| 54 | debug_print("* change found in macros doc path:", pf.path); |
| 55 | doc_tok = re.compile(DOC_ERRATA_TOKEN) |
| 56 | if contains_re(pf, doc_tok): |
| 57 | score = score + 1 |
| 58 | debug_print(" found", DOC_ERRATA_TOKEN) |
| 59 | |
| 60 | return score |
| 61 | |
| 62 | SUBJECT_TOKENS = r'fix\(cpus\)|revert\(cpus\)|fix\(errata\)|\(security\)' |
| 63 | CPU_PATH_TOKEN = r'lib/cpus/aarch(32|64)/.*\.S' |
| 64 | CPU_ERRATA_TOKEN = r'^report_errata ERRATA_' |
| 65 | DOC_PATH_TOKEN = r'docs/design/cpu-specific-build-macros.rst' |
| 66 | DOC_ERRATA_TOKEN = r'^^-\s*``ERRATA_' |
Okash Khawaja | 66a3a7e | 2023-01-24 11:06:16 +0000 | [diff] [blame^] | 67 | # REBASE_DEPTH is number of commits from tip of integration branch that we need |
| 68 | # to check to find the commit that the current patch set is based on |
| 69 | REBASE_DEPTH = 50 |
| 70 | # MAX_PATCHSET_DEPTH is the maximum number of patches that we expect in the current |
| 71 | # patch set. for each commit in the patch set we will look at past REBASE_DEPTH commits |
| 72 | # of integration branch. if there is a match we'd know the current patch set was based |
| 73 | # off of that matching commit. This is not necessarily the optimal method but I'm not |
| 74 | # familiar with gerrit API. If there is a way to do this better we should implement that. |
| 75 | MAX_PATCHSET_DEPTH = 50 |
| 76 | CHECK_AGAINST = 'integration' |
| 77 | TO_CHECK = 'to_check' |
| 78 | |
Okash Khawaja | 671bed2 | 2022-11-10 15:51:19 +0000 | [diff] [blame] | 79 | |
| 80 | ## TODO: for case like 921081049ec3 where we need to refactor first for security |
| 81 | # patch to be applied then we should: |
| 82 | # 1. find the security patch |
| 83 | # 2. from that patch find CVE number if any |
| 84 | # 3. look for all patches that contain that CVE number in commit message |
| 85 | |
| 86 | ## TODO: similar to errata macros and rst file additions, we have CVE macros and rst file |
| 87 | # additions. so we can use similar logic for that. |
| 88 | |
| 89 | ## TODO: for security we should look for CVE numbed regex match and if found flag it |
| 90 | def main(): |
| 91 | parser = argparse.ArgumentParser(prog="lts-triage.py", description="check patches for LTS candidacy") |
| 92 | parser.add_argument("--repo", required=True, help="path to tf-a git repo") |
Okash Khawaja | 671bed2 | 2022-11-10 15:51:19 +0000 | [diff] [blame] | 93 | parser.add_argument("--debug", help="print debug logs", action="store_true") |
| 94 | |
| 95 | args = parser.parse_args() |
| 96 | global global_debug |
| 97 | global_debug = args.debug |
| 98 | |
| 99 | repo = git.Repo(args.repo) |
Okash Khawaja | 671bed2 | 2022-11-10 15:51:19 +0000 | [diff] [blame] | 100 | |
Okash Khawaja | 66a3a7e | 2023-01-24 11:06:16 +0000 | [diff] [blame^] | 101 | # collect the integration hashes in a list |
| 102 | rebase_hashes = [] |
| 103 | for cmt in repo.iter_commits(CHECK_AGAINST): |
| 104 | rebase_hashes.append(cmt.hexsha) |
| 105 | if len(rebase_hashes) == REBASE_DEPTH: |
| 106 | break |
| 107 | |
| 108 | cnt = MAX_PATCHSET_DEPTH |
| 109 | for cmt in repo.iter_commits(TO_CHECK): |
Okash Khawaja | 671bed2 | 2022-11-10 15:51:19 +0000 | [diff] [blame] | 110 | score = 0 |
| 111 | |
Okash Khawaja | 66a3a7e | 2023-01-24 11:06:16 +0000 | [diff] [blame^] | 112 | # if we find a same commit hash among the ones we collected from integration branch |
| 113 | # then we have seen all the new patches in this patch set, so we should exit. |
| 114 | if cmt.hexsha in rebase_hashes: |
| 115 | debug_print("## stopping because found sha1 common between the two branches: ", cmt.hexsha) |
| 116 | break; |
| 117 | |
Okash Khawaja | 671bed2 | 2022-11-10 15:51:19 +0000 | [diff] [blame] | 118 | # don't process merge commits |
| 119 | if len(cmt.parents) > 1: |
| 120 | continue |
| 121 | |
| 122 | tok = re.compile(SUBJECT_TOKENS) |
| 123 | if tok.search(cmt.summary) is not None: |
| 124 | debug_print("## subject match") |
| 125 | score = score + 1 |
| 126 | |
| 127 | diff_text = repo.git.diff(cmt.hexsha + "~1", cmt.hexsha, ignore_blank_lines=True, ignore_space_at_eol=True) |
| 128 | ps = PatchSet(StringIO(diff_text)) |
| 129 | debug_print("# score before process_ps:", score) |
| 130 | score = score + process_ps(ps) |
| 131 | debug_print("# score after process_ps:", score) |
| 132 | |
| 133 | print("{}: {}".format(cmt.hexsha, score)) |
| 134 | |
| 135 | cnt = cnt - 1 |
| 136 | if cnt == 0: |
| 137 | break |
| 138 | |
| 139 | if __name__ == '__main__': |
| 140 | main() |