ci(lts): Add lts-triage-v2.py

lts-triage-v2.py checks the integration branch commits
against the lts branch. If there is any non-zero score commits,
a CSV report will be generated and will be sent to the maintainers.
Please check the README. [1]

[1]: https://linaro.atlassian.net/browse/TFC-510

Signed-off-by: Arthur She <arthur.she@linaro.org>
Change-Id: If4771e18e0cb1f76ebb09fb61188b5c763a942d7
diff --git a/lts/README b/lts/README
index 4e9e670..d069a58 100644
--- a/lts/README
+++ b/lts/README
@@ -61,3 +61,80 @@
 chore(cpus): remove redundant asserts:    0
 refactor(cpus): shorten errata flag defines:    0
 ## stopping because found sha1 common between the two branches:  1678bbb57249b6edd4cdebc3cbc33b04df8fa098
+
+=============
+lts-triage-v2.py
+=============
+
+Purpose of this script is to check a patch and determine if that patch qualifies
+as a candidate for one of the LTS branches.
+
+Currently it focuses on cpu errata while it contains basic support for security
+patches. It computes a crude score:
+
+1 point if subject line matches tokens for cpu errata or security
+1 point for "report_errata ERRATA" in a lib/cpus/aarch{32,64}/*.S file
+1 point for "- ``ERRATA_" in docs/design/cpu-specific-build-macros.rst file
+
+Those match tokens were moved to config.py
+Note that the script only looks at commits which are not merge commits.
+
+The script expects two branches: 'integration' and the lts branch, for example 'lts-v2.8'.
+it will check every commit in the integration branch, until it finds a common commit
+, same Change-Id, between the integration branch and the lts branch.
+
+At the end, if any commit has non-zero score, the script will create a file named at
+the path provided by --csv_path argument.
+The csv report includs the following information
+1. index
+2. commit id in the integration branch
+3. commit summary
+4. score
+5. Gerrit Change-Id
+6. patch link for the LTS branch: if the patch has been cherry-picked to the lts branch
+                                  , the Gerrit URL will be listed
+7. patch link for the integration branch
+
+Running it:
+-----------
+
+In order to perform "gerrit query", we need a Gerrit account and the corresponding SSH keyfile.
+Here's help output for convenience:
+
+$ python lts-triage.py -h
+usage: lts-triage.py [-h] --repo REPO --csv_path CSV_PATH --lts LTS --gerrit_user GERRIT_USER
+       --ssh_keyfile SSH_KEYFILE [--debug]
+
+check patches for LTS candidacy
+
+options:
+  -h, --help            show this help message and exit
+  --repo REPO           path to tf-a git repo
+  --csv_path CSV_PATH   path including the filename for CSV file
+  --lts LTS             LTS branch, ex. lts-v2.8
+  --gerrit_user GERRIT_USER
+                        The Gerrit account to perform the query
+  --ssh_keyfile SSH_KEYFILE
+                        The SSH keyfile
+  --debug               print debug logs
+
+Below is an example output. On left is commit hash of each of the commits
+observed by this script and on right is score assigned to it.
+
+$./lts-triage-v2.py --repo ../../trusted-firmware-a/ --lts lts-v2.10 --csv_path ./lts-v2.10-report.csv
+  --gerrit_user arthur-she --ssh_keyfile ~/.ssh/id_rsa.pub
+fix(cpus): fix a defect in Cortex-A715 erratum 2561034:    1
+fix(cpus): workaround for Cortex-A715 erratum 2413290:    2
+docs: add documentation for `entry_point_info`:    0
+build(npm): fix Commitizen ES Module errors:    0
+build(npm): adhere to Husky deprecation notice:    0
+fix(misra): fix MISRA defects:    0
+refactor(cm): couple el2 registers with dependent feature flags:    0
+fix(tc): do not use r0 for HW_CONFIG:    0
+fix(cpus): workaround for Cortex-A715 erratum 2344187:    2
+fix(cpus): workaround for Cortex-X4 erratum 2701112:    2
+fix(gic600): workaround for Part 1 of GIC600 erratum 2384374:    0
+fix(cpus): workaround for Cortex-A715 erratum 2331818:    2
+fix(arm): move console flush/switch in common function:    0
+fix(cpus): workaround for Cortex-A715 erratum 2420947:    2
+## stopping because found common Gerrit Change-Id between the two branches:  I612338fd2896f3fe614f23d14f56d58d43318a11
diff --git a/lts/config.py b/lts/config.py
new file mode 100644
index 0000000..8caed11
--- /dev/null
+++ b/lts/config.py
@@ -0,0 +1,5 @@
+MESSAGE_TOKENS = r'fix\(cpus\)|revert\(cpus\)|fix\(errata\)|\(security\)|erratum|sden|workaround'
+CPU_PATH_TOKEN = r'lib/cpus/aarch(32|64)/.*\.S'
+CPU_ERRATA_TOKEN = r'^check_erratum_|^workaround_start|^workaround.*_start'
+DOC_PATH_TOKEN = r'docs/design/cpu-specific-build-macros.rst|lib/cpus/cpu-ops\.mk"'
+DOC_ERRATA_TOKEN = r'^^-\s*``ERRATA_'
diff --git a/lts/lts-triage-v2.py b/lts/lts-triage-v2.py
new file mode 100755
index 0000000..63fd719
--- /dev/null
+++ b/lts/lts-triage-v2.py
@@ -0,0 +1,197 @@
+#!/usr/bin/env python3
+#
+# Copyright (c) 2022 Google LLC. All rights reserved.
+#
+# SPDX-License-Identifier: BSD-3-Clause
+
+# quick hacky script to check patches if they are candidates for lts. it checks
+# only the non-merge commits.
+
+import os
+import git
+import re
+import sys
+import csv
+import argparse
+import json
+import subprocess
+from io import StringIO
+from unidiff import PatchSet
+from config import MESSAGE_TOKENS, CPU_PATH_TOKEN, CPU_ERRATA_TOKEN, DOC_PATH_TOKEN, DOC_ERRATA_TOKEN
+
+global_debug = False
+def debug_print(*args, **kwargs):
+    global global_var
+    if global_debug:
+        print(*args, **kwargs)
+
+def contains_re(pf, tok):
+    for hnk in pf:
+        for ln in hnk:
+            if ln.is_context:
+                continue
+            # here means the line is either added or removed
+            txt = ln.value.strip()
+            if tok.search(txt) is not None:
+                return True
+
+    return False
+
+def process_ps(ps):
+    score = 0
+
+    cpu_tok = re.compile(CPU_PATH_TOKEN)
+    doc_tok = re.compile(DOC_PATH_TOKEN)
+
+    for pf in ps:
+        if pf.is_binary_file or not pf.is_modified_file:
+            continue
+        if cpu_tok.search(pf.path) is not None:
+            debug_print("* change found in cpu path:", pf.path);
+            cpu_tok = re.compile(CPU_ERRATA_TOKEN)
+            if contains_re(pf, cpu_tok):
+                score = score + 1
+                debug_print("    found", CPU_ERRATA_TOKEN)
+
+        if doc_tok.search(pf.path) is not None:
+            debug_print("* change found in macros doc path:", pf.path);
+            doc_tok = re.compile(DOC_ERRATA_TOKEN)
+            if contains_re(pf, doc_tok):
+                score = score + 1
+                debug_print("    found", DOC_ERRATA_TOKEN)
+
+    return score
+
+def query_gerrit(gerrit_user, ssh_key_path, change_id):
+    ssh_command = [
+        "ssh",
+        "-o", "UserKnownHostsFile=/dev/null",
+        "-o", "StrictHostKeyChecking=no",
+        "-o", "PubkeyAcceptedKeyTypes=+ssh-rsa",
+        "-p", "29418",
+        "-i", ssh_key_path,
+        f"{gerrit_user}@review.trustedfirmware.org",
+        f"gerrit query --format=JSON change:'{change_id}'",
+        "repo:'TF-A/trusted-firmware-a'"
+    ]
+
+    try:
+        result = subprocess.run(ssh_command, capture_output=True, text=True, check=True)
+        output = result.stdout.strip().split("\n")
+        changes = [json.loads(line) for line in output if line.strip()]
+        # Create a dictionary with branch as key and URL as value
+        branches_urls = {change["branch"]: change["url"] for change in changes if "branch" in change and "url" in change}
+        return branches_urls
+
+    except subprocess.CalledProcessError as e:
+        print("Error executing SSH command:", e)
+        return {}
+
+# REBASE_DEPTH is number of commits from tip of the LTS branch that we need
+# to check to find the commit that the current patch set is based on
+REBASE_DEPTH = 20
+
+
+## TODO: for case like 921081049ec3 where we need to refactor first for security
+#       patch to be applied then we should:
+#       1. find the security patch
+#       2. from that patch find CVE number if any
+#       3. look for all patches that contain that CVE number in commit message
+
+## TODO: similar to errata macros and rst file additions, we have CVE macros and rst file
+#       additions. so we can use similar logic for that.
+
+## TODO: for security we should look for CVE numbed regex match and if found flag it
+def main():
+    parser = argparse.ArgumentParser(prog="lts-triage.py", description="check patches for LTS candidacy")
+    parser.add_argument("--repo", required=True, help="path to tf-a git repo")
+    parser.add_argument("--csv_path", required=True, help="path including the filename for CSV file")
+    parser.add_argument("--lts", required=True, help="LTS branch, ex. lts-v2.8")
+    parser.add_argument("--gerrit_user", required=True, help="The user id to perform the Gerrit query")
+    parser.add_argument("--ssh_keyfile", required=True, help="The SSH keyfile")
+    parser.add_argument("--debug", help="print debug logs", action="store_true")
+
+    args = parser.parse_args()
+    lts_branch = args.lts
+    gerrit_user = args.gerrit_user
+    ssh_keyfile = args.ssh_keyfile
+    global global_debug
+    global_debug = args.debug
+
+    csv_columns = ["index", "commit id in the integration branch", "commit summary",
+                   "score", "Gerrit Change-Id", "patch link for the LTS branch",
+                   "patch link for the integration branch"]
+    csv_data = []
+    idx = 1
+
+    repo = git.Repo(args.repo)
+
+    # collect the LTS hashes in a list
+    lts_change_ids = set()  # Set to store Gerrit Change-Ids from the LTS branch
+
+    for cmt in repo.iter_commits(lts_branch):
+        # Extract Gerrit Change-Id from the commit message
+        change_id_match = re.search(r'Change-Id:\s*(\w+)', cmt.message)
+        if change_id_match:
+            lts_change_ids.add(change_id_match.group(1))
+
+        if len(lts_change_ids) >= REBASE_DEPTH:
+            break
+
+    for cmt in repo.iter_commits('integration'):
+        score = 0
+
+        # if we find a same Change-Id among the ones we collected from the LTS branch
+        # then we have seen all the new patches in the integration branch, so we should exit.
+        change_id_match = re.search(r'Change-Id:\s*(\w+)', cmt.message)
+        if change_id_match:
+            change_id = change_id_match.group(1)
+            if change_id in lts_change_ids:
+                print("## stopping because found common Gerrit Change-Id between the two branches: ", change_id)
+                break;
+
+        # don't process merge commits
+        if len(cmt.parents) > 1:
+            continue
+
+        tok = re.compile(MESSAGE_TOKENS, re.IGNORECASE)
+        if tok.search(cmt.message) is not None:
+            debug_print("## commit message match")
+            score = score + 1
+
+        diff_text = repo.git.diff(cmt.hexsha + "~1", cmt.hexsha, ignore_blank_lines=True, ignore_space_at_eol=True)
+        ps = PatchSet(StringIO(diff_text))
+        debug_print("# score before process_ps:", score)
+        score = score + process_ps(ps)
+        debug_print("# score after process_ps:", score)
+
+        ln = f"{cmt.summary}:    {score}"
+        print(ln)
+
+        if score > 0:
+            gerrit_links = query_gerrit(gerrit_user, ssh_keyfile, change_id)
+            # Append data to CSV
+            csv_data.append({
+                "index": idx,
+                "commit id in the integration branch": cmt.hexsha,
+                "commit summary": cmt.summary,
+                "score": score,
+                "Gerrit Change-Id": change_id,
+                "patch link for the LTS branch": gerrit_links.get(lts_branch, "N/A"),
+                "patch link for the integration branch": gerrit_links.get("integration", "N/A")
+            })
+            idx += 1
+            at_least_one_match = True
+
+    if at_least_one_match == True:
+        try:
+            with open(args.csv_path, "w", newline='') as csvfile:
+                writer = csv.DictWriter(csvfile, fieldnames=csv_columns)
+                writer.writeheader()
+                for data in csv_data:
+                    writer.writerow(data)
+        except:
+            print("\n\nERROR: Couldn't open CSV file due to error: ", sys.exc_info()[0])
+
+if __name__ == '__main__':
+    main()