summaryrefslogtreecommitdiff
path: root/tcwg-cleanup-stale-results.sh
diff options
context:
space:
mode:
authorMaxim Kuvyrkov <maxim.kuvyrkov@linaro.org>2020-06-28 16:18:29 +0000
committerMaxim Kuvyrkov <maxim.kuvyrkov@linaro.org>2020-09-07 10:26:36 +0000
commita74435caae3889c4e1dd98517a7c031cc9e3520d (patch)
tree76398e221da5c882c76f98048fe6756e6a3fd3c8 /tcwg-cleanup-stale-results.sh
parent5c8768c199441a84b942d9993d94a06262abd85b (diff)
tcwg-cleanup-stale-results.sh: New script to delete stale benchmarking results
... on bkp-01.tcwglab. This script parses toolchain/ci/base-artifacts.git repo and removes benchmarking results that are - NOT referenced in any linaro-local/ci/tcwg_bmk* branches and - older than 30 days. Change-Id: I5b29558f646ef2bfe58fb0fdb822668593f7db5b
Diffstat (limited to 'tcwg-cleanup-stale-results.sh')
-rwxr-xr-xtcwg-cleanup-stale-results.sh92
1 files changed, 92 insertions, 0 deletions
diff --git a/tcwg-cleanup-stale-results.sh b/tcwg-cleanup-stale-results.sh
new file mode 100755
index 00000000..413de235
--- /dev/null
+++ b/tcwg-cleanup-stale-results.sh
@@ -0,0 +1,92 @@
+#!/bin/bash
+
+set -euf -o pipefail
+
+scripts=$(dirname "$0")
+# shellcheck source=jenkins-helpers.sh
+. "$scripts/jenkins-helpers.sh"
+
+convert_args_to_variables "$@"
+
+days="${days-30}"
+refs_url="${refs_url-https://git.linaro.org/toolchain/ci/base-artifacts}"
+refs_pattern="${refs_pattern-refs/heads/linaro-local/ci/tcwg_bmk*}"
+results_top="${results_top-/home/tcwg-benchmark/results}"
+dryrun="${dryrun-true}"
+verbose="${verbose-true}"
+
+if $verbose; then
+ set -x
+fi
+
+# Delete "used_by" markers older than $days days.
+(set +f; find $results_top-* -name used_by -mtime "+$days" -delete)
+
+# Initialize base-artifacts repo (by cloning its "empty" branch).
+refs_repo=$(basename "$refs_url" .git)
+clone_or_update_repo_no_checkout "$refs_repo" "$refs_url" none empty origin
+git -C "$refs_repo" reset --hard
+
+# Walk through all commits of all tcwg_bmk* branches and mark results
+# referenced in those results with "used_by" file.
+while IFS= read -r ref; do
+ git -C "$refs_repo" fetch origin "$ref" >/dev/null 2>&1
+ git -C "$refs_repo" reset --hard FETCH_HEAD >/dev/null 2>&1
+ depth=0
+ # Walk all commits of just-fetched branch (i.e., until HEAD^ can't
+ # be parsed by git rev-parse).
+ while true; do
+ for results_id in "$refs_repo/results_id" \
+ "$refs_repo/results_id-1" \
+ "$refs_repo/results_id-2"; do
+ if [ -f "$results_id" ]; then
+ results_dir="$results_top-$(cat "$results_id")"
+ used_by="$refs_url/$ref~$depth"
+ if [ ! -d "$results_dir" ]; then
+ echo "WARNING: $used_by is missing $results_dir"
+ else
+ echo "$used_by" > "$results_dir/used_by"
+ fi
+ fi
+ done
+ if ! git -C "$refs_repo" rev-parse HEAD^ >/dev/null 2>&1; then
+ break
+ fi
+ git -C "$refs_repo" reset --hard HEAD^ >/dev/null 2>&1
+ depth=$(($depth+1))
+ done
+done < <(git ls-remote "$refs_url" "$refs_pattern" | awk '{ print $2 }')
+
+while IFS= read -r -d '' dir; do
+ # Skip already-deleted dirs (e.g., $dir's parent was deleted).
+ if [ ! -d "$dir" ]; then
+ continue
+ fi
+
+ # Don't delete "used_by" dirs and dirs that have recent files
+ # (i.e., "-mtime -$days"). E.g., in-progress benchmark might have uploaded
+ # partial results.
+ if [ x"$(find "$dir" -name used_by -o -mtime "-$days" | head -n1)" != x"" ]; then
+ continue
+ fi
+
+ # Don't delete subdirectories of a "used_by" parent.
+ parent="$dir"
+ used=false
+ while [ x"$parent" != x"/home/tcwg-benchmark" ] && ! $used; do
+ parent=$(dirname "$parent")
+ if [ -f "$parent/used_by" ]; then
+ used=true
+ fi
+ done
+ if $used; then
+ continue
+ fi
+
+ echo "DELETE: $dir is not used"
+ if $dryrun; then
+ echo "DRYRUN: rm -rf $dir"
+ else
+ rm -rf "$dir"
+ fi
+done < <(set +f; find $results_top-* -type d -print0)