diff options
Diffstat (limited to 'tcwg-cleanup-stale-containers.sh')
-rwxr-xr-x | tcwg-cleanup-stale-containers.sh | 37 |
1 files changed, 29 insertions, 8 deletions
diff --git a/tcwg-cleanup-stale-containers.sh b/tcwg-cleanup-stale-containers.sh index cc748c5c..d5d999d9 100755 --- a/tcwg-cleanup-stale-containers.sh +++ b/tcwg-cleanup-stale-containers.sh @@ -65,7 +65,11 @@ do_cleanup_containers () local hours="$1" local docker_ps_opts="$2" local action="$3" + local action_msg="$4" + local msg="$5" + local cleanup_containers=true + local dryrun_msg="" local only_jenkins_containers=true if [ "$hours" -eq "0" ]; then @@ -73,8 +77,10 @@ do_cleanup_containers () elif [ "$hours" -lt "0" ]; then hours="$((0-$hours))" cleanup_containers=false + dryrun_msg=" (DRYRUN)" fi + echo "$msg (more than ${hours}h)${dryrun_msg}" echo "Container report before:" $DOCKER ps $docker_ps_opts @@ -109,10 +115,10 @@ do_cleanup_containers () local res local status="0" if [ ${#rm_containers[@]} != 0 ]; then - echo "Removing containers: ${rm_containers[@]}" + echo "Containers to ${action_msg}: ${rm_containers[@]}" if $cleanup_containers; then for container in "${rm_containers[@]}"; do - echo "Removing container $container" + echo "Container to ${action_msg}: $container" $DOCKER $action $container & res=0; wait $! || res=$? if [ $res != 0 ]; then @@ -121,7 +127,7 @@ do_cleanup_containers () fi done else - echo "DRY_RUN: NOT REMOVING CONTAINERS" + echo "DRY_RUN: NOT ACTING ON CONTAINERS" echo "Increasing exit code to indicate stale containers" status="1" fi @@ -129,19 +135,19 @@ do_cleanup_containers () echo "Containers report after:" $DOCKER ps $docker_ps_opts else - echo "Found no container to remove" + echo "Found no container to ${action_msg}" fi exit $status } res="0" -do_cleanup_containers $cleanup_running_hours "" "stop" & +do_cleanup_containers $cleanup_running_hours "" "stop" "stop" "Stopping long-running containers" & wait $! || res=$? status=$res res="0" -do_cleanup_containers $cleanup_stopped_hours "-a" "rm -fv" & +do_cleanup_containers $cleanup_stopped_hours "-a" "rm -fv" "remove" "Removing containers stopped long ago" & wait $! || res=$? status=$(($status|(2*$res))) @@ -200,9 +206,24 @@ fi if [ "$cleanup_ssh_agent_hours" -gt "0" ]; then res=0; killall --older-than ${cleanup_ssh_agent_hours}h -u $USER ssh-agent & wait $! || res=$? + # Killall can fail for several reasons: + # return-code 127: command not found + # return-code 1: in general means no ssh-agent process was found + # In the first case, we want the cleanup job to fail, so that we + # know we need to install killall + # The second case is OK, unless killall fails for another + # reason. Assume OK for now. if [ $res != 0 ]; then - echo "WARNING: could not kill stale ssh-agent processes" - status=$(($status|16)) + case $res in + 127) + echo "WARNING: could not kill stale ssh-agent processes (killall command not found)" + echo "Increasing exit code to indicate killall is missing" + status=$(($status|16)) + ;; + 1) + echo "WARNING: could not kill stale ssh-agent processes or there was no stale ssh-agent older than ${cleanup_ssh_agent_hours}h" + ;; + esac fi fi |