summaryrefslogtreecommitdiff
path: root/tcwg-cleanup-stale-results.sh
blob: d47a57e5c80a5ce8ae86f55195fd2ec9e6b84267 (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
#!/bin/bash

set -euf -o pipefail

scripts=$(dirname "$0")
# shellcheck source=jenkins-helpers.sh
. "$scripts/jenkins-helpers.sh"

convert_args_to_variables "$@"

results_top="${results_top-$HOME/base-artifacts}"
verbose="${verbose-false}"
cleanup_gc="${cleanup_gc-true}"
cleanup_annex="${cleanup_annex-true}"

current_host="bkp-01.tcwglab"

if $verbose; then
    set -x
fi

WORKSPACE="${WORKSPACE-}"
if [ "${WORKSPACE-}" = "" ]; then
   WORKSPACE=$(mktemp -d)
   rm_workspace="rm -rf $WORKSPACE"
else
   rm_workspace="true"
fi

perform_cleanup_gc()
{
    ### CLEANUP THE GIT REPOSITORIES
    echo "=== CLEANUP THE GIT REPOSITORIES"
    while read -r gitdir; do
        if [ "$cleanup_gc" = "dryrun" ]; then
            echo "DRYRUN: git -C $gitdir gc"
        else
            echo "# git -C $gitdir gc"
            git -C $gitdir gc
        fi
    done < <(find $results_top -mindepth 2 -maxdepth 2 -type d -name '*.git')
}

perform_cleanup_annex()
{
    ### CLEANUP THE ANNEX FILES
    echo "=== CLEANUP THE ANNEX FILES"
    existing_annex_file=$WORKSPACE/list_annex.existing.txt
    used_annex_file=$WORKSPACE/list_annex.used.txt
    recent_annex_file=$WORKSPACE/list_annex.recent.txt
    rm -f $used_annex_file $existing_annex_file $recent_annex_file

    # List all existing annex
    echo "# existing annex results"
    assert_with_msg "ERROR: $results_top/annex doesnot exist" [ -d $results_top/annex ]
    find $results_top/annex/ -type f > $existing_annex_file

    sort -u $existing_annex_file > $existing_annex_file.tmp
    mv $existing_annex_file.tmp $existing_annex_file

    echo " => $(cat $existing_annex_file | wc -l) existing annex"

    # List all used annex
    echo "# referenced annex results"
    while read gitdir; do
        ci_project_config=${gitdir#$results_top/}
        ci_project_config=${ci_project_config%.git}

        # annex are tcwg_bmk only
        if ! [[ $ci_project_config =~ tcwg_bmk- ]]; then
            continue
        fi

        rm -rf base-artifacts
        git clone -q --reference $gitdir $gitdir \
            --branch linaro-local/ci/$ci_project_config \
            base-artifacts

        for br in $(git -C base-artifacts/ branch -r); do
            git -C base-artifacts checkout -q $br
            readarray -t all_bmk_datas < <(set +x; get_git_history 0 base-artifacts "annex/bmk-data")
            cat "${all_bmk_datas[@]:1}" | sed -e "s|^$current_host:||" >> $used_annex_file
            printf " => $(cat $used_annex_file | wc -l) referenced annex  -- %-200s\n" "[processed $ci_project_config ($br)]"
            rm -rf "${all_bmk_datas[0]}"
        done
    done < <(find $results_top -mindepth 2 -maxdepth 2 -type d -name '*.git')

    echo ""
    echo " => $(cat $used_annex_file | wc -l) referenced annex"

    # recent annex
    find $results_top/annex/ -type f -mtime -30 > $recent_annex_file
    echo " => $(cat $recent_annex_file | wc -l) recent annex (less than 1-month old)"

    # include recent annex in the referenced ones
    sort -u $used_annex_file $recent_annex_file > $used_annex_file.tmp
    mv $used_annex_file.tmp $used_annex_file

    ### compare and remove useless annex
    missing_annex=list_annex.referenced_but_not_exist.txt
    useless_annex=list_annex.exist_but_not_referenced.txt

    set +o pipefail
    diff -u $existing_annex_file $used_annex_file | grep '^\+' | sed -e 's|^\+||' | tail -n +2 > $missing_annex
    diff -u $existing_annex_file $used_annex_file | grep '^\-' | sed -e 's|^\-||' | tail -n +2 > $useless_annex

    if [ -s $missing_annex ]; then
        echo "WARNING: these annex are referenced, but not exists"
        cat $missing_annex | sed -e 's|^|    |'
    else
        echo "NOTE: All referenced annex files exist"
    fi

    if [ -s $useless_annex ]; then
        echo "REMOVING: About to remove $(cat $useless_annex|wc -l) files."
    else
        echo "NOTE: No annex file to remove."
    fi

    for file in $(cat $useless_annex); do
        if [ "$cleanup_annex" = "dryrun" ]; then
            echo "DRYRUN: rm -rf $file"
        else
            rm -rf "$file"
        fi
    done
}

cd $WORKSPACE

# free 10Gb disk space. This will be necessary to cleanup the git repositories
rm -f empty-10Gbfile.tmp

if [ "$cleanup_gc" != "false" ]; then
    perform_cleanup_gc
fi

if [ "$cleanup_annex" != "false" ]; then
    perform_cleanup_annex
fi

# Create a big file to reserve 10Gb for next cleanup.
dd if=/dev/zero of=empty-10Gbfile.tmp bs=1G count=10

$rm_workspace