summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorAndy Green <andy.green@linaro.org>2015-08-17 21:18:06 +0800
committerAndy Green <andy.green@linaro.org>2015-08-17 21:30:55 +0800
commitf4ca2de7664ef9478850ed25872de4a4399b6a36 (patch)
treef545964b5b73eb0b3f8f5f6183573749fff37865
-rwxr-xr-xgrowth.sh658
1 files changed, 658 insertions, 0 deletions
diff --git a/growth.sh b/growth.sh
new file mode 100755
index 0000000..c716b84
--- /dev/null
+++ b/growth.sh
@@ -0,0 +1,658 @@
+#!/bin/bash
+#
+# growth.sh
+#
+# Copyright (C) 2015 Linaro, Ltd
+# Andy Green <andy.green@linaro.org>
+# Licensed under GPL2.1
+#
+# Please run the script with no args to get comprehensive help
+#
+# Note on sqlite3 usage
+#
+# The sqlite3 db generated here is just caching analysis the script
+# generated itself. You can delete it and the script will recreate an
+# empty one automatically, but you will have to regenerate the runs
+# that were stored in it.
+#
+# Having the data cached there is helpful both is making complex queries
+# that are difficult to reproduce using cut, sed. sort etc and in allowing
+# quick development of new graphs and queries without the cost of generating
+# the data each time.
+
+
+DB=growth.sq3
+SCHEMA_VER=1
+
+BP=
+DIRCOL=0
+rm -f .cols.tmp
+touch .cols.tmp
+rm -f .first-phase
+
+function sq3()
+{
+ sqlite3 $DB "$1"
+ if [ $? -ne 0 ] ; then
+ echo "sqlite error"
+ echo $1
+ exit 1
+ fi
+}
+
+# number of preset columns before dir ones
+OFFSET_COLS=8
+
+# return col num of dirname in DIRCOL
+# $1: dirname
+
+# $1: basis branch, $2: starting tree
+
+function basis_point()
+{
+ echo basis_point $1 $2
+
+ BP=`diff -u <(git rev-list --first-parent $2) \
+ <(git rev-list --first-parent $1) | \
+ sed -ne 's/^ //p' | head -1`
+
+ echo result $BP
+}
+
+# $1: basis branch, $2: empty or comparison ref, $3: basis point if known
+# $4: index in sequence, $5: run_key we are attached to
+
+function make_stat()
+{
+ local CB
+ local BASIS
+ local COMP
+ local DS
+ local DS_BASIS
+ local STATS
+
+ CB="$2"
+ if [ -z "$3" ] ; then
+ basis_point $1 $CB
+ else
+ BP=$3
+ fi
+
+ BASIS=`git describe $BP`
+ COMP=`git describe $CB`
+ DS=`git log -n 1 $CB --format=format:%ct`
+ DS_BASIS=`git show $BP --format=format:%ct | head -n1`
+
+ echo "git diff $BP..$CB --shortstat"
+
+
+ F="`git diff $BP..$CB --shortstat`"
+ FILES=`echo $F | cut -d' ' -f1`
+ ADD=`echo $F | cut -d' ' -f4`
+ REM=`echo $F | cut -d' ' -f6`
+
+ sq3 "insert into snapshots (run_idx, ref_name, ref_date, \
+ basis_name, basis_date, files_changed, \
+ loc_added, loc_removed) \
+ values ($5, \"$COMP\", $DS, \"$BASIS\", $DS_BASIS, \
+ $FILES, $ADD, $REM);"
+ SKEY=`sq3 "select seq from sqlite_sequence where \
+ name=\"snapshots\""`
+
+ git diff $BP..$CB --numstat | while read i ; do
+
+ F="`echo $i | cut -d' ' -f3`"
+ ADD=`echo $i | cut -d' ' -f1`
+ REM=`echo $i | cut -d' ' -f2`
+
+ if [ "$ADD" != "-" -a \
+ -z "`echo $F | grep gitignore`" \
+ ] ; then
+
+ DEPTH=`echo "$F" | sed "s|[^/]||g" | wc -c`
+
+ sq3 "insert into dir_summary (snap_idx, run_key, dir_name, \
+ dir_depth, loc_added, loc_removed) \
+ values ($SKEY, $5, \"$F\", $DEPTH, $ADD, $REM);"
+ fi
+ done
+
+}
+
+# $1: stats file, $2: y axis, $3: output, $4: title
+
+function issue_plot_time()
+{
+ cat >plot.tmp <<EOF
+set terminal pngcairo notransparent enhanced font "arial,10" size 660, 320
+set output '$2'
+set title "$3"
+set yrange [ 0 : ] noreverse nowriteback
+set lmargin 9
+set rmargin 2
+set autoscale xfixmin
+set xdata time
+set termoption dash
+set timefmt "%s"
+set key autotitle columnhead
+plot $1
+EOF
+
+ >&2 echo "creating $3"
+ gnuplot plot.tmp
+}
+
+# $1: stats file, $2: y axis, $3: output, $4: title, $5: dimensions
+
+function issue_plot_file_dist()
+{
+ cat >plot.tmp <<EOF
+set terminal pngcairo notransparent enhanced font "arial,10" size $5
+set output '$3'
+set title "$4"
+set yrange [ : $2 ] noreverse nowriteback
+set lmargin 9
+set rmargin 2
+set autoscale xfixmin
+#set xdata time
+set termoption dash
+set timefmt "%s"
+set key autotitle columnhead
+set style fill transparent solid 0.5 noborder
+set xtics rotate
+set style data histogram
+set label "$6" at graph 0.99,0.8 right
+plot $1
+EOF
+
+ >&2 echo "creating $3"
+ gnuplot plot.tmp
+}
+
+
+# create schema
+
+# one of these for each comparison run
+
+sq3 "create table if not exists runs (\
+ run_key integer primary key autoincrement, \
+ basis_hash varchar(50), \
+ comp_hash varchar(50), \
+ tags integer, \
+ schema_ver integer \
+);"
+
+# one of these for each snapshot compared
+
+sq3 "create table if not exists snapshots (\
+ snap_idx integer primary key autoincrement, \
+ run_idx integer, \
+ ref_name varchar(50), \
+ ref_date integer, \
+ basis_name varchar(50), \
+ basis_date integer, \
+ files_changed integer, \
+ loc_added integer, \
+ loc_removed integer \
+);"
+
+# one of these for each dir changed in the snapshot
+# we have run_key here as well since it simplifies finding all paths
+
+sq3 "create table if not exists dir_summary (\
+ key integer primary key autoincrement, \
+ snap_idx integer, \
+ run_key integer, \
+ dir_name varchar(150), \
+ dir_depth integer, \
+ loc_added integer, \
+ loc_removed integer \
+);"
+
+
+if [ -z "$1" ] ; then
+ >&2 echo "Usage: $0 <basis branch> --tags <tag regexp>"
+ >&2 echo " $0 --plot <run #> [ subdir ]"
+ >&2 echo " $0 --plot <run # a> < - | subdir > <run # b>"
+ >&2 echo ""
+ >&2 echo "$0 can be run in two modes, either create a 'run' in"
+ >&2 echo "the sqlite3 db cache, or create graphs about one or"
+ >&2 echo "comparing two runs already in the db cache"
+ >&2 echo ""
+ >&2 echo "Creating a 'run' from one or more tags"
+ >&2 echo "--------------------------------------"
+ >&2 echo ""
+ >&2 echo "A 'run' is created by studying one or more tags against"
+ >&2 echo "a 'basis branch' to isolate the patches on top of the"
+ >&2 echo "tag's basis point. So if you have a kernel branch that"
+ >&2 echo "is tracking mainline, the various tags you have on that"
+ >&2 echo "kernel branch may be based on different mainline versions."
+ >&2 echo "$0 can autodiscover for each tag where the basis point is"
+ >&2 echo "if you just give him the basis branch name, eg, 'mainline'."
+ >&2 echo ""
+ >&2 echo " $$ $0 mainline --tags mybranch-tagname-regexp"
+ >&2 echo ""
+ >&2 echo "Notice that the tag name to analyze on one 'run' is a regexp."
+ >&2 echo "It's fine to have many tags analyzed in one 'run'."
+ >&2 echo ""
+ >&2 echo "When the run starts, the run number is reported and you"
+ >&2 echo "should make a note of it"
+ >&2 echo ""
+ >&2 echo ""
+ >&2 echo "Plotting graphs from one or two runs"
+ >&2 echo "------------------------------------"
+ >&2 echo ""
+ >&2 echo "After the analysis for the tags you are interested in has"
+ >&2 echo "been captured into 'runs' in the sqlite3 db cache, you can"
+ >&2 echo "run the script to produce png and gif graphs showing or"
+ >&2 echo "comparing the data from different runs."
+ >&2 echo ""
+ >&2 echo "There's no requirement at all that the different runs have"
+ >&2 echo "anything in common in their history, basis or content,"
+ >&2 echo "giving a lot of flexibility in the comparisons."
+ >&2 echo ""
+ >&2 echo "To produce graphs about one run itself:"
+ >&2 echo ""
+ >&2 echo " $$ $0 --plot <run #> [ subdir ]"
+ >&2 echo ""
+ >&2 echo "If subdir is missing, the whole tree is analysed, if given"
+ >&2 echo "the analysis is restricted to the subdirectory given."
+ >&2 echo ""
+ >&2 echo "To produce graphs comparing two runs:"
+ >&2 echo ""
+ >&2 echo " $$ $0 --plot <run # a> < - | subdir > <run # b>"
+ >&2 echo ""
+ >&2 echo "If there is no subdir restriction, - must be given.""
+ >&2 echo "<run # b> must contain only one tag in this case."
+ >&2 echo ""
+ >&2 echo "Graphs will be produced with the union of information"
+ >&2 echo "from run a and run b, showing run a in blue and run b in red."
+
+ exit 1
+fi
+
+LEVELS=1,2
+FILTER=$3
+if [ "$FILTER" = "-" ] ; then
+ FILTER=
+fi
+
+FILTERLEN=${#FILTER}
+F_DEPTH=`echo "$FILTER" | sed "s|[^/]||g" | wc -c`
+
+if [ ! -z "$FILTER" ] ; then
+ if [ $F_DEPTH == 1 ] ; then
+ LEVELS=1,2,3
+ else if [ $F_DEPTH == 2 ] ; then
+ LEVELS=1,2,3,4
+ else if [ $F_DEPTH == 3 ] ; then
+ LEVELS=1,2,3,4,5
+ else
+ LEVELS=1,2,3,4,5,6
+ fi
+ fi
+ fi
+fi
+
+#
+# plot mode
+#
+
+if [ "$1" = "--plot" ] ; then
+
+ PLOT_RUN=$2
+ COMP_RUN=$4
+
+ R=`sq3 "select comp_hash,basis_hash from runs where run_key=$PLOT_RUN"|\
+ tr '|' '-'`
+
+ #
+ # get a list of snapshot idxs for both runs combined
+ #
+
+ if [ ! -z "$COMP_RUN" ] ; then
+ RUN_IDX_COMP="(run_idx=$PLOT_RUN or run_idx=$COMP_RUN)"
+ RUN_KEY_COMP="(run_key=$PLOT_RUN or run_key=$COMP_RUN)"
+ CR=`sq3 "select comp_hash,basis_hash from runs where \
+ run_key=$COMP_RUN" | tr '|' '-'`
+ R="$R"-VS-$CR
+ else
+ RUN_IDX_COMP="run_idx=$PLOT_RUN"
+ RUN_KEY_COMP="run_key=$PLOT_RUN"
+ fi
+echo $R
+ # our snapshots
+ SN=`sq3 "select snap_idx from snapshots where run_idx=$PLOT_RUN"`
+
+ # there's a comparison snapshot?
+ SNC=
+ if [ ! -z "$COMP_RUN" ] ; then
+ SNC=`sq3 "select snap_idx from snapshots where run_idx=$COMP_RUN"`
+ COUNT=
+ for i in $SNC ; do
+ if [ ! -z "$COUNT" ] ; then
+ >&2 echo "Must be single comparison snapshot"
+ exit 1
+ fi
+ COUNT=x
+ done
+ fi
+
+ rm -f .plot.tmp
+ rm -f .plot.cols .plot.cols1
+ rm -f .plot.dist
+
+ #
+ # using both runs if two given,
+ # create the column header row, and fill .plot.cols with the
+ # list of files / dirs changed in this view of the diff
+ #
+
+ echo -n "basis_name basis_date ref_name ref_date files add del " \
+ > .plot.tmp
+ sq3 "select dir_name from dir_summary where \
+ $RUN_KEY_COMP and \
+ substr(dir_name, 1, $FILTERLEN)=\"$FILTER\" \
+ order by loc_added,loc_removed asc" | \
+ cut -d'/' -f$LEVELS | while read i ; do
+ if [ ! -z "`echo "$i" | grep ^Documentation/`" ] ; then
+ echo "Documentation" >> .plot.cols1
+ else
+ if [ "$i" != "." ] ; then
+ # don't allow individual files
+ if [ ! -d "$i" ] ; then
+ dirname $i >> .plot.cols1
+ else
+ echo $i >> .plot.cols1
+ fi
+ fi
+ fi
+ done
+
+ #
+ # put the column titles in place and write out the
+ # filtered list of files/dirs we will care about
+ #
+ cat .plot.cols1 | sort | uniq | while read i ; do
+ echo -n "$i " >> .plot.tmp
+ echo $i >> .plot.cols
+ done
+ echo >> .plot.tmp
+
+ #
+ # find out how many snapshots created by the run he's using
+ # it doesn't include any comparison snapshot
+ #
+ N=0
+ for i in $SN ; do
+ N=$(( $N + 1 ))
+ done
+
+ # how many files were changed
+ CHANGEDFILES=`wc -l .plot.cols | cut -d' ' -f1`
+
+ >&2 echo "Studying $N snapshots"
+ >&2 echo "Total $CHANGEDFILES files changed"
+
+ #
+ # For each snapshot, go through the list of changed files/dirs and
+ # find out how much changed there in that snapshot
+ #
+ T=1
+ for i in $SN ; do
+ L=`sq3 "select basis_name, basis_date, ref_name, \
+ ref_date, files_changed, loc_added, loc_removed\
+ from snapshots where snap_idx=$i" | tr '|' ' '`
+ echo -n $L >> .plot.tmp
+
+ >&2 echo -n -e "Snapshot $T/$N: `echo $L | cut -d' ' -f3` \r"
+ T=$(( $T + 1 ))
+
+ cat .plot.cols | while read j ; do
+ JLEN=${#j}
+
+ # are we going to deal with his subdirs?
+ if [ -z "`cat .plot.cols | grep "$j/"`" ] ; then
+
+ # everything inside this dir
+ A=`sq3 "select sum(loc_added) \
+ from dir_summary where \
+ snap_idx=$i and \
+ substr(dir_name, 1, $JLEN)=\"$j\"\
+ " | head -n1`
+
+ else
+ # it's truncated, so only files in this dir
+ # eg arch, but arch/arm is handled elsewhere
+
+ DEP=$(( `echo "$j" | sed "s|[^/]||g" | wc -c` + 1 ))
+ A=`sq3 "select sum(loc_added) \
+ from dir_summary where\
+ snap_idx=$i and \
+ substr(dir_name, 1, $JLEN)=\"$j\" and \
+ dir_depth=$DEP" | head -n1`
+ fi
+
+ if [ -z "$A" ] ; then
+ echo -n "0 " >> .plot.tmp
+ else
+ echo -n "$A " >> .plot.tmp
+ fi
+ done
+ echo >> .plot.tmp
+ done
+
+ >&2 echo
+
+ #
+ # for plots related to changes over time, we can do them now
+ #
+
+ issue_plot_time "'.plot.tmp' using \
+ 4:6 notitle with filledcurve y1=0 lc rgb \"#0000ff\"" \
+ "growth-$R-LOC.png" "$R growth in LOC"
+
+ issue_plot_time "'.plot.tmp' using \
+ 4:( (\$4-\$2)/(24 * 3600) ) notitle \
+ with filledcurve y1=0 lc rgb \"#0000ff\"" \
+ "growth-$R-basis-age.png" "$R growth basis age (days)"
+
+ rm -f .plot.tmp1
+
+ #
+ # for each file / dir that has changes in any snapshot, for each
+ # snapshot calculate its changes and create a unified plot data file
+ #
+ echo 0 > .biggest
+
+ T=1
+ cat .plot.cols | while read j ; do
+
+ >&2 echo -n -e "File $T/$CHANGEDFILES \r"
+ T=$(( $T + 1 ))
+
+ echo -n "$j " >> .plot.tmp1
+
+ BIGGEST=`cat .biggest`
+
+ JLEN=${#j}
+
+ for i in $SN $SNC ; do
+ if [ -z "`cat .plot.cols | grep "$j/"`" ] ; then
+ # everything inside the dir
+ A=`sq3 "select sum(loc_added) \
+ from dir_summary where\
+ snap_idx=$i and \
+ substr(dir_name, 1, $JLEN)=\"$j\" \
+ "|head -n1`
+
+ D=`sq3 "select sum(loc_removed) \
+ from dir_summary where\
+ snap_idx=$i and \
+ substr(dir_name, 1, $JLEN)=\"$j\" \
+ "|head -n1`
+ else
+ # it's truncated, so only files in this dir
+ # eg arch, but arch/arm is handled elsewhere
+ DEP=$(( `echo "$j" | sed "s|[^/]||g" | wc -c` + 1 ))
+
+ A=`sq3 "select sum(loc_added) \
+ from dir_summary where \
+ snap_idx=$i and \
+ substr(dir_name, 1, $JLEN)=\"$j\"\
+ and dir_depth=$DEP" | head -n1`
+
+ D=`sq3 "select sum(loc_removed) \
+ from dir_summary where \
+ snap_idx=$i and \
+ substr(dir_name, 1, $JLEN)=\"$j\" \
+ and dir_depth=$DEP" | head -n1`
+ fi
+
+ if [ ! -z "$A" -a ! -z "$D" ] ; then
+ V=$(( $A - $D ))
+
+ echo -n "$V " >> .plot.tmp1
+
+ if [ $V -gt $BIGGEST ] ; then
+ BIGGEST=$V
+ echo $V > .biggest
+ fi
+ else
+ echo -n "0 " >> .plot.tmp1
+ fi
+ done
+
+ echo >> .plot.tmp1
+ done
+
+ >&2 echo
+
+ echo -n "idx dir " > .plot.tmp
+ for i in $SN $SNC ; do
+ V="`sq3 "select ref_name \
+ from snapshots where snap_idx=$i"`"
+
+ echo -n "$V " >> .plot.tmp
+ done
+ echo >> .plot.tmp
+
+ C=0
+ sort -k$(( $N + 1 )) -nr .plot.tmp1 | while read i ; do
+ echo "$C $i" >> .plot.tmp
+ C=$(( $C + 1 ))
+ done
+
+ WIDTH=$(( 16 * `cat .plot.cols | wc -l` ))
+ if [ $WIDTH -lt 640 ] ; then
+ WIDTH=640
+ fi
+
+ # plot each snapshot in turn
+
+ C=1
+ while [ $C -le $N ]; do
+
+ BIGGEST=`cat .biggest`
+
+ TOT=`cat .plot.tmp |tail -n+2 | \
+ cut -d' ' -f$(( $C + 2 )) |paste -sd+ | bc`
+ _FILTER=`echo "$FILTER" | sed "s|/|_|g"`
+ if [ ! -z "$_FILTER" ] ; then
+ _FILTER=$_FILTER-
+ fi
+
+ PL="'.plot.tmp' using 1:$(( $C + 2 )):xtic(2) \
+ w boxes lc rgb \"#0000ff\" "
+
+ Q="`cat .plot.tmp | head -n1 | \
+ cut -d' ' -f$(( $C + 2 ))`"
+
+ _TOT="Total LOC $Q: $TOT"
+
+ echo $_TOT
+
+ if [ ! -z "$SNC" ] ; then
+ PL="'.plot.tmp' using 1:$(( $N + 3 )):xtic(2) \
+ w boxes lc rgb \"#ff0000\",$PL"
+ TOTC="`cat .plot.tmp |tail -n+2 | \
+ cut -d' ' -f$(( $N + 3 )) |paste -sd+ | bc`"
+ Q="`cat .plot.tmp | head -n1 | \
+ cut -d' ' -f$(( $N + 3 ))`" \
+
+ _TOT="Total LOC $Q: $TOTC\n$_TOT"
+ fi
+
+ issue_plot_file_dist \
+ "$PL" $BIGGEST \
+ "growth-$R-dist-$_FILTER`printf %04d $C`.png" \
+ "$R patch distribution (LOC) $3" \
+ $WIDTH,480 "$_TOT"
+ C=$(( $C + 1 ))
+ done
+
+ >&2 echo "Converting gif"
+ convert -delay 50 -loop 0 growth-$R-dist-????.png growth-$R-dist.gif
+
+ exit 0
+fi
+
+#
+# Tagged rebase tree mode
+#
+
+if [ "$2" = "--tags" ] ; then
+ if [ -z "$3" ] ; then
+ >&2 echo "Need tag regexp filter with --tags"
+ exit 1
+ fi
+
+ sq3 "insert into runs ( \
+ run_key, basis_hash, comp_hash, schema_ver) \
+ values (NULL, \"$1\", \"$3\", \"$SCHEMA_VER\"); \
+ "
+ RUNKEY=`sq3 "select seq from sqlite_sequence where \
+ name=\"runs\""`
+
+ >&2 echo "tags mode -- run $RUNKEY"
+ index=0
+ for i in `git tag | grep "$3"` ; do
+ >&2 echo $i
+ make_stat $1 $i "" $index $RUNKEY
+ index=$(( $index + 1 ))
+ done
+
+ exit 0
+fi
+
+exit 0
+
+# ---> untested
+
+#
+# History tree mode
+#
+
+if [ ! -z "$2" ] ; then
+ COMP=$2
+else
+ COMP=`git rev-parse --abbrev-ref HEAD`
+fi
+
+basis_point $1 $COMP
+
+git log $BP.. --oneline | \
+ cut -d' ' -f1 | \
+ tac > .patches.tmp
+
+TODO=`wc -l .patches.tmp | cut -d' ' -f 1`
+C=1
+
+cat .patches.tmp | while read i ; do
+ >&2 echo "Patch $C/$TODO"
+ make_stat $1 $i $BP
+ C=$(( $C + 1 ))
+done
+
+exit 0
+