diff options
author | Maxim Kuvyrkov <maxim.kuvyrkov@linaro.org> | 2020-06-28 16:18:29 +0000 |
---|---|---|
committer | Maxim Kuvyrkov <maxim.kuvyrkov@linaro.org> | 2020-09-07 10:26:36 +0000 |
commit | a74435caae3889c4e1dd98517a7c031cc9e3520d (patch) | |
tree | 76398e221da5c882c76f98048fe6756e6a3fd3c8 /tcwg-cleanup-stale-results.sh | |
parent | 5c8768c199441a84b942d9993d94a06262abd85b (diff) |
tcwg-cleanup-stale-results.sh: New script to delete stale benchmarking results
... on bkp-01.tcwglab. This script parses toolchain/ci/base-artifacts.git
repo and removes benchmarking results that are
- NOT referenced in any linaro-local/ci/tcwg_bmk* branches and
- older than 30 days.
Change-Id: I5b29558f646ef2bfe58fb0fdb822668593f7db5b
Diffstat (limited to 'tcwg-cleanup-stale-results.sh')
-rwxr-xr-x | tcwg-cleanup-stale-results.sh | 92 |
1 files changed, 92 insertions, 0 deletions
diff --git a/tcwg-cleanup-stale-results.sh b/tcwg-cleanup-stale-results.sh new file mode 100755 index 00000000..413de235 --- /dev/null +++ b/tcwg-cleanup-stale-results.sh @@ -0,0 +1,92 @@ +#!/bin/bash + +set -euf -o pipefail + +scripts=$(dirname "$0") +# shellcheck source=jenkins-helpers.sh +. "$scripts/jenkins-helpers.sh" + +convert_args_to_variables "$@" + +days="${days-30}" +refs_url="${refs_url-https://git.linaro.org/toolchain/ci/base-artifacts}" +refs_pattern="${refs_pattern-refs/heads/linaro-local/ci/tcwg_bmk*}" +results_top="${results_top-/home/tcwg-benchmark/results}" +dryrun="${dryrun-true}" +verbose="${verbose-true}" + +if $verbose; then + set -x +fi + +# Delete "used_by" markers older than $days days. +(set +f; find $results_top-* -name used_by -mtime "+$days" -delete) + +# Initialize base-artifacts repo (by cloning its "empty" branch). +refs_repo=$(basename "$refs_url" .git) +clone_or_update_repo_no_checkout "$refs_repo" "$refs_url" none empty origin +git -C "$refs_repo" reset --hard + +# Walk through all commits of all tcwg_bmk* branches and mark results +# referenced in those results with "used_by" file. +while IFS= read -r ref; do + git -C "$refs_repo" fetch origin "$ref" >/dev/null 2>&1 + git -C "$refs_repo" reset --hard FETCH_HEAD >/dev/null 2>&1 + depth=0 + # Walk all commits of just-fetched branch (i.e., until HEAD^ can't + # be parsed by git rev-parse). + while true; do + for results_id in "$refs_repo/results_id" \ + "$refs_repo/results_id-1" \ + "$refs_repo/results_id-2"; do + if [ -f "$results_id" ]; then + results_dir="$results_top-$(cat "$results_id")" + used_by="$refs_url/$ref~$depth" + if [ ! -d "$results_dir" ]; then + echo "WARNING: $used_by is missing $results_dir" + else + echo "$used_by" > "$results_dir/used_by" + fi + fi + done + if ! git -C "$refs_repo" rev-parse HEAD^ >/dev/null 2>&1; then + break + fi + git -C "$refs_repo" reset --hard HEAD^ >/dev/null 2>&1 + depth=$(($depth+1)) + done +done < <(git ls-remote "$refs_url" "$refs_pattern" | awk '{ print $2 }') + +while IFS= read -r -d '' dir; do + # Skip already-deleted dirs (e.g., $dir's parent was deleted). + if [ ! -d "$dir" ]; then + continue + fi + + # Don't delete "used_by" dirs and dirs that have recent files + # (i.e., "-mtime -$days"). E.g., in-progress benchmark might have uploaded + # partial results. + if [ x"$(find "$dir" -name used_by -o -mtime "-$days" | head -n1)" != x"" ]; then + continue + fi + + # Don't delete subdirectories of a "used_by" parent. + parent="$dir" + used=false + while [ x"$parent" != x"/home/tcwg-benchmark" ] && ! $used; do + parent=$(dirname "$parent") + if [ -f "$parent/used_by" ]; then + used=true + fi + done + if $used; then + continue + fi + + echo "DELETE: $dir is not used" + if $dryrun; then + echo "DRYRUN: rm -rf $dir" + else + rm -rf "$dir" + fi +done < <(set +f; find $results_top-* -type d -print0) |