Visualize lines of source code by module This visualization generates a stacked bar chart depicting lines of code within each module broken down by language. Change-Id: I7a14a6dcf38a6611c41e21182177bb23a4baf2a2

commit: c7e28f57aed5fb28b14c19b8a172aa0cc5b9cb8f [log] [tgz]
author: Jimmy Brisson <jimmy.brisson@arm.com> Thu Jan 21 16:23:21 2021 -0600
committer: Zelalem Aweke <zelalem.aweke@arm.com> Tue Feb 02 23:22:17 2021 +0000
tree: 3143dc43c47aa1b41fdf6250b8cd5ce10a702825
parent: cb545bda25e975668808334070941a41d4f6210b [diff]
diff --git a/script/graphs/README.rst b/script/graphs/README.rst
index 10724cb..f9db4a1 100644
--- a/script/graphs/README.rst
+++ b/script/graphs/README.rst

@@ -18,4 +18,23 @@
 
     bash categorize-tests.bash juno > juno-tests.png 2> juno-tests.txt
 
+Lines of Code by Module
+-----------------------
+
+The script `sloc-viz.bash`, and its associated plot script, generate a stacked
+bar chart where each bar is a module and the bars' segments represent programming
+languages (or documentation languages). This script will produce a graph for
+whatever directory it's run within, and has special logic that includes more
+detail when run from the Trusted Firmware - A project's root directory.
+
+This script has additional requirements:
+* ``tokei`` - a quick source lines of code counting tool
+* ``jq`` - a JSON query language for the command line, version 1.6 or later
+  as the ``--jsonargs`` option is required
+
+For example, when run from the root of TF-A, the following commandline will graph
+sloc of TF-A:
+
+    bash ../<this-repo>/script/graph/sloc-viz.bash > sloc.png 2> sloc.tsv
+
 *Copyright (c) 2021, Arm Limited. All rights reserved.*

diff --git a/script/graphs/sloc-viz.bash b/script/graphs/sloc-viz.bash
new file mode 100644
index 0000000..135b4cf
--- /dev/null
+++ b/script/graphs/sloc-viz.bash

@@ -0,0 +1,85 @@
+#
+# Copyright (c) 2021 Arm Limited. All rights reserved.
+#
+# SPDX-License-Identifier: BSD-3-Clause
+#
+#!/usr/bin/env bash
+set -euo pipefail
+
+# Overview
+# ========
+#
+# This script generates source lines of code as a tab separated values (TSV)
+# file and a stacked bar chart. It uses `tokei` for gathering the data, and
+# `gnuplot` for generating the plot. The data is available on stderr and the
+# plot will be put in stdout.
+#
+# This script generates information about the directory that it's run in,
+# aggregated by subdirectory.
+# It is recommended that you run it from within the TF-A root directory for
+# best results.
+
+# Variables
+# =========
+
+# convert newlines to tabs
+n2t="tr \n \t"
+
+# We will build the final data file incrementally throughout the script. We need
+# A place to store this data, temporarily, so mktemp fills the role.
+data=$(mktemp XXXXXX-sloc.tsv)
+
+# Top level TF-A directories that we consider by themselves.
+toplevel=$(find -mindepth 1 -maxdepth 1 -type d -and ! -name ".*" | sed "s|./||g")
+
+# Second level TF-A directories that we consider separately.
+secondlevel=$(find drivers plat -mindepth 1 -maxdepth 1 -type d || true)
+
+# We want to be sure that we always put the data in the same order, with the
+# same keys in the resulting TSV file. To ensure this, we keep a json-encoded
+# array of the categories we would like to show in the graph.
+# This was generated by taking the output of `tokei --output json | jq keys`
+# and trimming out things that we don't really need like "Svg"
+categories='["AssemblyGAS", "C", "CHeader", "DeviceTree", "Makefile", "Python", "ReStructuredText"]'
+
+# Data File Generation
+# ====================
+#
+# Below we generate the data file used for the graph. The table is a
+# tab separated value(TSV) matrix with columns of code language (Bash, C, etc),
+# and rows of subdirectories of TF-A that contain the code.
+
+# Column headers
+# --------------
+(echo module; echo $categories | jq ".[]" ) | $n2t  > $data
+# add a newline
+echo >> $data
+
+# Build Each Row
+# --------------
+for dir in $toplevel $secondlevel; do
+	# Gnuplot likes to treat underscores as a syntax for subscripts. This
+	# looks weird, as module names are not named with this syntax in mind.
+	# Further, it turns out that we go through 3 expansions, so we need 8 (2^3)
+	# backslashes.
+	echo $dir | sed -e "s/_/\\\\\\\\_/g" | $n2t >> $data
+	# This is the heart of the implementation, and probably the most
+	# complicated line in this script. First, we generate the subdirectory
+	# sloc with tokei, in json format. We then filter it with jq. The jq
+	# filter is a foreach loop where we iterate over $x = column name, as
+	# passed in as the first positional argument. Each interation through
+	# the loop, we print out the code value, when it exists, or null + 0.
+	# This takes advantage of the property of null:
+	#  > null can be added to any value, and returns the other value
+	#  > unchanged.
+	tokei --output json $dir \
+		| jq '$ARGS.positional[0][] as $x | .[$x].code + 0' \
+			--jsonargs "$categories" \
+		| $n2t >> $data
+	echo  >> $data
+done
+
+cat $data 1>&2
+gnuplot -c ${0%bash}plot $data
+
+rm $data

diff --git a/script/graphs/sloc-viz.plot b/script/graphs/sloc-viz.plot
new file mode 100644
index 0000000..a334b89
--- /dev/null
+++ b/script/graphs/sloc-viz.plot

@@ -0,0 +1,23 @@
+#
+# Copyright (c) 2021 Arm Limited. All rights reserved.
+#
+# SPDX-License-Identifier: BSD-3-Clause
+#
+#
+# Stacked histograms
+#
+set terminal png enhanced font ",18" size 1920, 1080
+set title "Source Lines of Code by Module"
+set key invert reverse Left outside
+set key autotitle columnheader
+set auto y
+set auto x
+unset xtics
+set xtics nomirror rotate by -75 scale 0
+set style data histogram
+set style histogram rowstacked
+set style fill solid border -1
+set boxwidth 0.75
+#
+plot ARG1 using 2:xtic(1), for [i=3:8] '' using i
+#
commit	c7e28f57aed5fb28b14c19b8a172aa0cc5b9cb8f	[log] [tgz]
author	Jimmy Brisson <jimmy.brisson@arm.com>	Thu Jan 21 16:23:21 2021 -0600
committer	Zelalem Aweke <zelalem.aweke@arm.com>	Tue Feb 02 23:22:17 2021 +0000
tree	3143dc43c47aa1b41fdf6250b8cd5ce10a702825
parent	cb545bda25e975668808334070941a41d4f6210b [diff]