refactor: miscellaneous fixes to visualizations

This change introduces some minor refactoring work to both
visualization scripts, namely:

- Comments reflowed to reach 80 characters per line
- Fixes to the category names for more recent versions of Tokei
- GNUPlot scripts no longer require a fixed number of columns
- Whitespace inconsistency fixes

SLOC-specific changes:

- Updated extended color palette to prevent color cycling
- Search directories can now be passed via arguments
- Parent directories no longer include their children's stats

Change-Id: I3f0ecb849a97f1f8008423772ddde4ca8c4771c2
Signed-off-by: Chris Kay <chris.kay@arm.com>
Co-authored-by: Weronika Wiesiolek <weronika.wiesiolek@arm.com>
diff --git a/script/graphs/sloc-viz.bash b/script/graphs/sloc-viz.bash
index 6bef76f..bc05da2 100644
--- a/script/graphs/sloc-viz.bash
+++ b/script/graphs/sloc-viz.bash
@@ -1,9 +1,11 @@
+#!/usr/bin/env bash
+
 #
-# Copyright (c) 2021 Arm Limited. All rights reserved.
+# Copyright (c) 2021-2022 Arm Limited. All rights reserved.
 #
 # SPDX-License-Identifier: BSD-3-Clause
 #
-#!/usr/bin/env bash
+
 set -euo pipefail
 
 # Overview
@@ -16,69 +18,101 @@
 #
 # This script generates information about the directory that it's run in,
 # aggregated by subdirectory.
+#
 # It is recommended that you run it from within the TF-A root directory for
 # best results.
 
-# Variables
+# Functions
 # =========
 
-# convert newlines to tabs
-n2t="tr \n \t"
+# Convert newlines to tabs
+n2t() {
+	tr "\n" "\t"
+}
+
+# Strip trailing tabs
+strip() {
+	sed 's/\t$//'
+}
+
+# Variables
+# =========
 
 # We will build the final data file incrementally throughout the script. We need
 # A place to store this data, temporarily, so mktemp fills the role.
 data=$(mktemp XXXXXX-sloc.tsv)
 
-# Top level TF-A directories that we consider by themselves.
-toplevel=$(find -mindepth 1 -maxdepth 1 -type d -and ! -name ".*" | sed "s|./||g")
+# Subdirectories that we will analyze
+analyze=("${@:-"." "./drivers" "./plat" "./tools"}")
 
-# Second level TF-A directories that we consider separately.
-secondlevel=$(find drivers plat -mindepth 1 -maxdepth 1 -type d || true)
+# Top-level directories that we will analyze
+readarray -t dirs < <(find ${analyze[@]} -maxdepth 1 -type d -not -path '*/\.*' | sort -u)
 
 # We want to be sure that we always put the data in the same order, with the
 # same keys in the resulting TSV file. To ensure this, we keep a json-encoded
 # array of the categories we would like to show in the graph.
-# This was generated by taking the output of `tokei --output json | jq keys`
-# and trimming out things that we don't really need like "Svg"
-categories='["AssemblyGAS", "C", "CHeader", "DeviceTree", "Makefile", "Python", "ReStructuredText"]'
+#
+# This was generated by taking the output of `tokei --output json | jq keys` and
+# trimming out things that we don't really need like "Svg"
+categories=$(tokei --output json | jq 'keys - ["Total", "Autoconf", "CSS", "JSON", "Module-Definition", "Plain Text", "SVG", "SWIG", "XML" ]')
 
 # Data File Generation
 # ====================
 #
-# Below we generate the data file used for the graph. The table is a
-# tab separated value(TSV) matrix with columns of code language (Bash, C, etc),
+# Below we generate the data file used for the graph. The table is a tab
+# separated value (TSV) matrix with columns of code language (Bash, C, etc.),
 # and rows of subdirectories of TF-A that contain the code.
 
 # Column headers
 # --------------
-(echo module; echo $categories | jq ".[]" ) | $n2t  > $data
-# add a newline
-echo >> $data
+(echo "Module"; echo ${categories} | jq ".[]" ) | n2t  > "${data}"
+echo >> "${data}"
 
 # Build Each Row
 # --------------
-for dir in $toplevel $secondlevel; do
+for dir in "${dirs[@]}"; do
+	# Don't process directories that are ignored by Git
+	if git check-ignore -q "${dir}"; then
+		continue
+	fi
+
 	# Gnuplot likes to treat underscores as a syntax for subscripts. This
 	# looks weird, as module names are not named with this syntax in mind.
-	# Further, it turns out that we go through 3 expansions, so we need 8 (2^3)
-	# backslashes.
-	echo $dir | sed -e "s/_/\\\\\\\\_/g" | $n2t >> $data
+	# Further, it turns out that we go through 3 expansions, so we need 8
+	# (2^3) backslashes.
+	echo "${dir}" | sed -e "s/_/\\\\\\\\_/g" | n2t >> "${data}"
+
+	# Additional arguments to Tokei
+	args=()
+
+	# Don't include the statistics of this directory's children in its own
+	# statistics if they are going to be analyzed separately.
+	readarray -t excludes < <(printf '%s\n' "${dirs[@]}" | grep "${dir}/")
+
+	for exclude in "${excludes[@]}"; do
+		# Tokei uses gitignore syntax, so we need to strip the leading
+		# period.
+		args+=(--exclude "${exclude#.}")
+	done
+
 	# This is the heart of the implementation, and probably the most
 	# complicated line in this script. First, we generate the subdirectory
-	# sloc with tokei, in json format. We then filter it with jq. The jq
+	# sloc with tokei, in JSON format. We then filter it with jq. The jq
 	# filter iterates over the column names as saved in the categories
 	# variable. Each iteration through the loop, we print out the code
 	# value, when it exists, or null + 0. This takes advantage of the
-	# property of null:
-	#  > null can be added to any value, and returns the other value
-	#  > unchanged.
-	tokei --output json $dir \
-	        | jq " .[$categories[]].code + 0" \
-		| $n2t >> $data
-	echo  >> $data
+	# property of null:
+	#
+	# > null can be added to any value, and returns the other value
+	# > unchanged.
+	tokei "${dir}" --output json "${args[@]}" \
+		| jq " .[${categories}[]].code + 0" \
+		| n2t | strip >> "${data}"
+
+	echo >> "${data}"
 done
 
-cat $data 1>&2
-gnuplot -c ${0%bash}plot $data
+cat "${data}" 1>&2
+gnuplot -c "${0%bash}plot" "${data}"
 
-rm $data
+rm "${data}"