summaryrefslogtreecommitdiff
path: root/tcwg-start-container.sh
diff options
context:
space:
mode:
Diffstat (limited to 'tcwg-start-container.sh')
-rwxr-xr-xtcwg-start-container.sh91
1 files changed, 83 insertions, 8 deletions
diff --git a/tcwg-start-container.sh b/tcwg-start-container.sh
index e33a4bb7..d1e35c6d 100755
--- a/tcwg-start-container.sh
+++ b/tcwg-start-container.sh
@@ -1,6 +1,6 @@
#!/bin/bash
-set -ef -o pipefail
+set -euf -o pipefail
# shellcheck source=jenkins-helpers.sh
. "$(dirname $0)"/jenkins-helpers.sh
@@ -10,14 +10,71 @@ shift "$SHIFT_CONVERTED_ARGS"
obligatory_variables container image
declare container image
-keep_existing="${keep_existing-true}"
+dryrun="${dryrun-false}"
+keep_existing="${keep_existing-keep_if_same_image}"
verbose="${verbose-true}"
-
-set -u
+additional_options="${additional_options-}"
+test_docker="${test_docker-false}"
if $verbose; then set -x; fi
-docker pull "$image"
+# Check that docker can start a container.
+test_docker()
+{
+ timeout 30s /root/docker-wrapper ps
+ /root/docker-wrapper maybepull "$image"
+ /root/docker-wrapper run --rm --entrypoint=/bin/sh "$image"
+ echo "NOTE: Docker seems to be OK"
+}
+
+if [ -f /root/docker-wrapper ]; then
+ # /root/docker-wrapper is created by dockerfiles/tcwg-base/tcwg-host/run.sh;
+ # on benchmarking boards /root is bind-mounted inside "host" container.
+ if $test_docker; then
+ # The fact that we are here implies that we running as root on
+ # a bare machine.
+ test_docker &
+ if ! wait $!; then
+ storage_driver=$(timeout 30s /root/docker-wrapper info \
+ | grep "Storage Driver" | awk '{print $3}' \
+ || true)
+ if [ x"$storage_driver" = x"" ] \
+ || [ x"$storage_driver" = x"devicemapper" ]; then
+ # With the TK1's old kernel the only way to run docker
+ # is to use devicemapper storage driver with loopback
+ # backend, which is unfit for production usage.
+ # Every few months the loopback file gets corrupted and
+ # docker can't start.
+ # To solve this we go nuclear on docker.
+ timeout 30s /usr/sbin/service docker stop || true
+ rm -rf /var/lib/docker/
+ # If below hangs, then we'll just wait for the eventual
+ # power-cycle. If docker still doesn't work from a clean
+ # state, then we need to investigate manually.
+ /usr/sbin/service docker stop || true
+ fi
+ /usr/sbin/service docker restart
+ test_docker &
+ if ! wait $!; then
+ echo "ERROR: Cannot make docker work on the system"
+ exit 1
+ fi
+ fi
+ fi
+
+ if [ x"$keep_existing" != x"false" ]; then
+ # We have docker-wrapper available, so use it to workaround dockerhub's
+ # limits on pull requests. This is important for benchmarking boards,
+ # which call tcwg-update-bmk-containers.sh for every build.
+ /root/docker-wrapper maybepull "$image"
+ else
+ # We are asked to update the container unconditionally.
+ # Make sure we will use latest image.
+ docker pull "$image"
+ fi
+else
+ docker pull "$image"
+fi
rm_cnt=""
if docker stats --no-stream "$container" >/dev/null 2>&1; then
@@ -32,6 +89,11 @@ if docker stats --no-stream "$container" >/dev/null 2>&1; then
fi
;;
esac
+
+ if $dryrun; then
+ exit $EXTERNAL_FAIL
+ fi
+
# Rename the current container to free-up the name for "docker run" below.
# Use rename name starting with a number (seconds since epoch) so that
# it'll be cleaned up even if something goes wrong here.
@@ -42,11 +104,18 @@ if docker stats --no-stream "$container" >/dev/null 2>&1; then
# Failure to rename a container is usually caused by container
# restarting loop. This restarting container can't be the current
# one, so just delete it.
- docker rm -vf "$container"
+ docker stop "$container" || true
+ if ! docker rm -v "$container"; then
+ docker rm -vf "$container"
+ fi
rm_cnt=""
fi
fi
+if $dryrun; then
+ exit $EXTERNAL_FAIL
+fi
+
qemu_mount=""
qemu_bin=$(mktemp -p $HOME)
case "$(uname -m):$image" in
@@ -62,7 +131,7 @@ esac
start_sh=$(mktemp)
docker run --rm $qemu_mount $image start.sh > "$start_sh"
-bash "$start_sh" "$@"
+bash "$start_sh" --verbose "$verbose" --additional_options "$additional_options" -- "$@"
rm "$start_sh" "$qemu_bin"
if [ x"$rm_cnt" != x"" ]; then
@@ -70,5 +139,11 @@ if [ x"$rm_cnt" != x"" ]; then
# Note that if both old and new containers need an exclusive resource
# (e.g., tcp port or connection to jenkins), then the new container might
# need to restart a couple of times to wait for removal of the old one.
- docker rm -vf "$rm_cnt"
+ #
+ # We first try to gracefully shutdown the container
+ docker stop "$rm_cnt" || true
+ if ! docker rm -v "$rm_cnt"; then
+ # ... and force SIGKILL only when necessary.
+ docker rm -fv "$rm_cnt"
+ fi
fi