diff options
Diffstat (limited to 'tcwg-start-container.sh')
-rwxr-xr-x | tcwg-start-container.sh | 91 |
1 files changed, 83 insertions, 8 deletions
diff --git a/tcwg-start-container.sh b/tcwg-start-container.sh index e33a4bb7..d1e35c6d 100755 --- a/tcwg-start-container.sh +++ b/tcwg-start-container.sh @@ -1,6 +1,6 @@ #!/bin/bash -set -ef -o pipefail +set -euf -o pipefail # shellcheck source=jenkins-helpers.sh . "$(dirname $0)"/jenkins-helpers.sh @@ -10,14 +10,71 @@ shift "$SHIFT_CONVERTED_ARGS" obligatory_variables container image declare container image -keep_existing="${keep_existing-true}" +dryrun="${dryrun-false}" +keep_existing="${keep_existing-keep_if_same_image}" verbose="${verbose-true}" - -set -u +additional_options="${additional_options-}" +test_docker="${test_docker-false}" if $verbose; then set -x; fi -docker pull "$image" +# Check that docker can start a container. +test_docker() +{ + timeout 30s /root/docker-wrapper ps + /root/docker-wrapper maybepull "$image" + /root/docker-wrapper run --rm --entrypoint=/bin/sh "$image" + echo "NOTE: Docker seems to be OK" +} + +if [ -f /root/docker-wrapper ]; then + # /root/docker-wrapper is created by dockerfiles/tcwg-base/tcwg-host/run.sh; + # on benchmarking boards /root is bind-mounted inside "host" container. + if $test_docker; then + # The fact that we are here implies that we running as root on + # a bare machine. + test_docker & + if ! wait $!; then + storage_driver=$(timeout 30s /root/docker-wrapper info \ + | grep "Storage Driver" | awk '{print $3}' \ + || true) + if [ x"$storage_driver" = x"" ] \ + || [ x"$storage_driver" = x"devicemapper" ]; then + # With the TK1's old kernel the only way to run docker + # is to use devicemapper storage driver with loopback + # backend, which is unfit for production usage. + # Every few months the loopback file gets corrupted and + # docker can't start. + # To solve this we go nuclear on docker. + timeout 30s /usr/sbin/service docker stop || true + rm -rf /var/lib/docker/ + # If below hangs, then we'll just wait for the eventual + # power-cycle. If docker still doesn't work from a clean + # state, then we need to investigate manually. + /usr/sbin/service docker stop || true + fi + /usr/sbin/service docker restart + test_docker & + if ! wait $!; then + echo "ERROR: Cannot make docker work on the system" + exit 1 + fi + fi + fi + + if [ x"$keep_existing" != x"false" ]; then + # We have docker-wrapper available, so use it to workaround dockerhub's + # limits on pull requests. This is important for benchmarking boards, + # which call tcwg-update-bmk-containers.sh for every build. + /root/docker-wrapper maybepull "$image" + else + # We are asked to update the container unconditionally. + # Make sure we will use latest image. + docker pull "$image" + fi +else + docker pull "$image" +fi rm_cnt="" if docker stats --no-stream "$container" >/dev/null 2>&1; then @@ -32,6 +89,11 @@ if docker stats --no-stream "$container" >/dev/null 2>&1; then fi ;; esac + + if $dryrun; then + exit $EXTERNAL_FAIL + fi + # Rename the current container to free-up the name for "docker run" below. # Use rename name starting with a number (seconds since epoch) so that # it'll be cleaned up even if something goes wrong here. @@ -42,11 +104,18 @@ if docker stats --no-stream "$container" >/dev/null 2>&1; then # Failure to rename a container is usually caused by container # restarting loop. This restarting container can't be the current # one, so just delete it. - docker rm -vf "$container" + docker stop "$container" || true + if ! docker rm -v "$container"; then + docker rm -vf "$container" + fi rm_cnt="" fi fi +if $dryrun; then + exit $EXTERNAL_FAIL +fi + qemu_mount="" qemu_bin=$(mktemp -p $HOME) case "$(uname -m):$image" in @@ -62,7 +131,7 @@ esac start_sh=$(mktemp) docker run --rm $qemu_mount $image start.sh > "$start_sh" -bash "$start_sh" "$@" +bash "$start_sh" --verbose "$verbose" --additional_options "$additional_options" -- "$@" rm "$start_sh" "$qemu_bin" if [ x"$rm_cnt" != x"" ]; then @@ -70,5 +139,11 @@ if [ x"$rm_cnt" != x"" ]; then # Note that if both old and new containers need an exclusive resource # (e.g., tcp port or connection to jenkins), then the new container might # need to restart a couple of times to wait for removal of the old one. - docker rm -vf "$rm_cnt" + # + # We first try to gracefully shutdown the container + docker stop "$rm_cnt" || true + if ! docker rm -v "$rm_cnt"; then + # ... and force SIGKILL only when necessary. + docker rm -fv "$rm_cnt" + fi fi |