summaryrefslogtreecommitdiff
path: root/tcwg-start-container.sh
blob: 7c02a5505af97afc71160c8cc2bbd0357ce20b16 (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
#!/bin/bash

set -euf -o pipefail

# shellcheck source=jenkins-helpers.sh
. "$(dirname $0)"/jenkins-helpers.sh
convert_args_to_variables "$@"
shift "$SHIFT_CONVERTED_ARGS"

obligatory_variables container image
declare container image

keep_existing="${keep_existing-true}"
verbose="${verbose-true}"
additional_options="${additional_options-}"
test_docker="${test_docker-false}"

if $verbose; then set -x; fi

# Check that docker can start a container.
test_docker()
{
    /root/docker-wrapper ps
    /root/docker-wrapper maybepull "$image"
    /root/docker-wrapper run --rm --entrypoint=/bin/sh "$image"
    echo "NOTE: Docker seems to be OK"
}

if [ -f /root/docker-wrapper ]; then
    # The fact that we are here implies that we running as root.
    if $test_docker; then
	test_docker &
	if ! wait $!; then
	    if /root/docker-wrapper info | grep -q "devicemapper"; then
		# With the TK1's old kernel the only way to run docker
		# is to use devicemapper storage driver with loopback
		# backend, which is unfit for production usage.
		# Every few months the loopback file gets corrupted and
		# docker can't start.
		# To solve this we go nuclear on docker.
		/usr/sbin/service docker stop || true
		rm -rf /var/lib/docker/
	    fi
	    /usr/sbin/service docker restart
	    test_docker &
	    if ! wait $!; then
		echo "ERROR: Cannot make docker work on the system"
		exit 1
	    fi
	fi
    fi

    if [ x"$keep_existing" != x"false" ]; then
	# We have docker-wrapper available, so use it to workaround dockerhub's
	# limits on pull requests.  This is important for benchmarking boards,
	# which call tcwg-update-bmk-containers.sh for every build.
	/root/docker-wrapper maybepull "$image"
    else
	# We are asked to update the container unconditionally.
	# Make sure we will use latest image. 
	docker pull "$image"
    fi
else
    docker pull "$image"
fi

rm_cnt=""
if docker stats --no-stream "$container" >/dev/null 2>&1; then
    running=$(docker container inspect -f "{{.State.Running}}" "$container")
    case "$running:$keep_existing" in
	true:true) exit 0 ;;
	true:keep_if_same_image)
	    old_image=$(docker container inspect -f "{{.Image}}" "$container")
	    new_image=$(docker image inspect -f "{{.Id}}" "$image")
	    if [ x"$old_image" = x"$new_image" ]; then
		exit 0
	    fi
	    ;;
    esac
    # Rename the current container to free-up the name for "docker run" below.
    # Use rename name starting with a number (seconds since epoch) so that
    # it'll be cleaned up even if something goes wrong here.
    rm_cnt="$(date +%Y-%m-%d)-$container.bak"
    docker rename "$container" "$rm_cnt" &
    res=0 && wait $! || res=$?
    if [ x"$res" != x"0" ]; then
	# Failure to rename a container is usually caused by container
	# restarting loop.  This restarting container can't be the current
	# one, so just delete it.
	docker stop "$container" || true
	if ! docker rm -v "$container"; then
	    docker rm -vf "$container"
	fi
	rm_cnt=""
    fi
fi

qemu_mount=""
qemu_bin=$(mktemp -p $HOME)
case "$(uname -m):$image" in
    x86_64:*-arm64-tcwg-llvmbot-*)
	# See dockerfiles.git/tcwg-base/tcwg-llvmbot/start.sh for details
	# on how this works.
	cp "$(which qemu-aarch64-static)" "$qemu_bin"
	chmod +x "$qemu_bin"
	qemu_mount="-v $qemu_bin:/bin/qemu-aarch64-static"
	;;
esac

start_sh=$(mktemp)
docker run --rm $qemu_mount $image start.sh > "$start_sh"

bash "$start_sh" --verbose "$verbose" --additional_options "$additional_options" -- "$@"
rm "$start_sh" "$qemu_bin"

if [ x"$rm_cnt" != x"" ]; then
    # With the new container started delete the old one.
    # Note that if both old and new containers need an exclusive resource
    # (e.g., tcp port or connection to jenkins), then the new container might
    # need to restart a couple of times to wait for removal of the old one.
    #
    # We first try to gracefully shutdown the container
    docker stop "$rm_cnt" || true
    if ! docker rm -v "$rm_cnt"; then
	# ... and force SIGKILL only when necessary.
	docker rm -fv "$rm_cnt"
    fi
fi