summaryrefslogtreecommitdiff
path: root/tcwg-cleanup-stale-results.sh
blob: 413de2354a3aa64412dee0fc3b64483b71f7ae1a (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
#!/bin/bash

set -euf -o pipefail

scripts=$(dirname "$0")
# shellcheck source=jenkins-helpers.sh
. "$scripts/jenkins-helpers.sh"

convert_args_to_variables "$@"

days="${days-30}"
refs_url="${refs_url-https://git.linaro.org/toolchain/ci/base-artifacts}"
refs_pattern="${refs_pattern-refs/heads/linaro-local/ci/tcwg_bmk*}"
results_top="${results_top-/home/tcwg-benchmark/results}"
dryrun="${dryrun-true}"
verbose="${verbose-true}"

if $verbose; then
    set -x
fi

# Delete "used_by" markers older than $days days.
(set +f; find $results_top-* -name used_by -mtime "+$days" -delete)

# Initialize base-artifacts repo (by cloning its "empty" branch).
refs_repo=$(basename "$refs_url" .git)
clone_or_update_repo_no_checkout "$refs_repo" "$refs_url" none empty origin
git -C "$refs_repo" reset --hard

# Walk through all commits of all tcwg_bmk* branches and mark results
# referenced in those results with "used_by" file.
while IFS= read -r ref; do
    git -C "$refs_repo" fetch origin "$ref" >/dev/null 2>&1
    git -C "$refs_repo" reset --hard FETCH_HEAD >/dev/null 2>&1
    depth=0
    # Walk all commits of just-fetched branch (i.e., until HEAD^ can't
    # be parsed by git rev-parse).
    while true; do
	for results_id in "$refs_repo/results_id" \
			      "$refs_repo/results_id-1" \
			      "$refs_repo/results_id-2"; do
	    if [ -f "$results_id" ]; then
		results_dir="$results_top-$(cat "$results_id")"
		used_by="$refs_url/$ref~$depth"
		if [ ! -d "$results_dir" ]; then
		    echo "WARNING: $used_by is missing $results_dir"
		else
		    echo "$used_by" > "$results_dir/used_by"
		fi
	    fi
	done
	if ! git -C "$refs_repo" rev-parse HEAD^ >/dev/null 2>&1; then
	    break
	fi
	git -C "$refs_repo" reset --hard HEAD^ >/dev/null 2>&1
	depth=$(($depth+1))
    done
done < <(git ls-remote "$refs_url" "$refs_pattern" | awk '{ print $2 }')

while IFS= read -r -d '' dir; do
    # Skip already-deleted dirs (e.g., $dir's parent was deleted).
    if [ ! -d "$dir" ]; then
	continue
    fi

    # Don't delete "used_by" dirs and dirs that have recent files
    # (i.e., "-mtime -$days").  E.g., in-progress benchmark might have uploaded
    # partial results.
    if [ x"$(find "$dir" -name used_by -o -mtime "-$days" | head -n1)" != x"" ]; then
	continue
    fi

    # Don't delete subdirectories of a "used_by" parent.
    parent="$dir"
    used=false
    while [ x"$parent" != x"/home/tcwg-benchmark" ] && ! $used; do
	parent=$(dirname "$parent")
	if [ -f "$parent/used_by" ]; then
	    used=true
	fi
    done
    if $used; then
	continue
    fi

    echo "DELETE: $dir is not used"
    if $dryrun; then
	echo "DRYRUN: rm -rf $dir"
    else
	rm -rf "$dir"
    fi
done < <(set +f; find $results_top-* -type d -print0)