#!/bin/bash set -e -o pipefail # shellcheck source=jenkins-helpers.sh . "$(dirname $0)"/jenkins-helpers.sh # Start a local docker instance with the requested arch and distro # This script is meant to be executed from Jenkins jobs inside TCWG # lab. It prints shell commands meant to be executed in the parent # shell, consisting in: # - definition of ${CONTAINER}, used to prefix commands that you want # to run inside the container. # - definition of ${CONTAINER_CLEANUP}, a cleanup statement remove the # container on exit for instance # - definition of ${session_host} and ${session_port}, can be used for # a remote connexion to the container usage() { echo "Usage: $0 [--arch container-arch] --distro flavour [--docker_opts opts] [--dryrun true/false] [--label label] [--newuser username:[uid]] [--node node] [--prefix prefix] [--session-host host] [--session-name name] [--ssh_info true/false] [--task {build|test|bench}] [--user user] [--weight weight] [--verbose true/false]" echo echo " container-arch: architecture (eg: amd64, i386, arm64, armhf)" echo " distro: distribution (eg: bionic)" echo " dryrun: boolean, just print commands if true" echo " label: jenkins label; container is started on least-busy node; also sets container architecture" echo " newuser: new user to create inside container, [:] specification." echo " node: jenkins node; container is started on host mapped to the node" echo " prefix: prefix to prepend to output variables and functions" echo " session-host: hostname where the container will run, defaults to localhost" echo " useful if the name resolution does not work correctly" echo " session-name: session, in case the default '\$BUILD_NUMBER-\$JOB_NAME' is not suitable" echo " ssh_info: set \$ssh_host and \$ssh_port env variables in the container" echo " task: type of container (build, test or bench, default=build)" echo " user: remote user to use in the container." echo " weight: container weight, reserves resources. Default=1" echo " verbose: whether enable verbose output. Default=false" exit 1 } ssh_error() { echo "ERROR: ssh returned with code: $1, date: $(date). Trying another ssh connexion to $session_host to get debug logs:" ssh -v $session_host true return $1 } # Save stdout/stderr file descriptors exec 3>&1 4>&2 # Make sure all output goes to stderr exec 1>&2 container_arch="default" distro="default" docker_opts= dryrun=false label= node= newuser= prefix= session_host= session_name= ssh_info=false task="build" weight=1 user= verbose="false" while [ $# -ge 1 ] do case $1 in --arch) container_arch=$2 [ x${container_arch} = x ] && usage shift 2 ;; --distro) distro=$2 [ x${distro} = x ] && usage shift 2 ;; --docker_opts) docker_opts="$2" [ x"${docker_opts}" = x ] && usage shift 2 ;; --dryrun) dryrun=$2 [ x${dryrun} = x ] && usage [ $dryrun != false ] && [ $dryrun != true ] && usage shift 2 ;; --label) label=$2 [ x${label} = x ] && usage shift 2 ;; --node) node=$2 [ x${node} = x ] && usage shift 2 ;; --newuser) newuser="$2" [ x${newuser} = x ] && usage shift 2 ;; --prefix) prefix=$2 [ x${prefix} = x ] && usage shift 2 ;; --session-host) session_host=$2 [ x${session_host} = x ] && usage shift 2 ;; --session-name) session_name=$2 [ x${session_name} = x ] && usage shift 2 ;; --ssh_info) ssh_info=$2 [ x$ssh_info = x ] && usage shift 2 ;; --task) task=$2 case "${task}" in build|bench|test) ;; *) usage ;; esac shift 2 ;; --user) user="$2"@ [ x${user} = x@ ] && usage shift 2 ;; --weight) weight=$2 [ x${weight} = x ] && usage shift 2 ;; --verbose) verbose=$2 [ x${verbose} = x ] && usage shift 2 ;; *) echo "Unsupported option: $1" usage ;; esac done if $verbose; then set -x; fi dryruncmd="" if $dryrun; then dryruncmd="echo" fi if [ x"$node" = x"" ] && [ x"$session_host" = x"" ] && [ x"$label" != x"" ]; then node=$(print_node_with_least_containers "$label") if [ x"$node" = x"" ]; then echo "ERROR: Cannot find node for $label" exit 1 fi fi if [ x"$node" != x"" ]; then if [ x"$session_host" != x"" ]; then echo "--session_host conflicts with --node" usage fi session_host=$(print_host_for_node $node) fi if [ x"$session_host" = x"" ]; then # Get first FQDN. This name needs to have .tcwglab suffix for VPN'ed # machines and entries in .ssh/config for external machines. session_host=$(hostname -A | cut -d" " -f 1) arch_host="localhost" else arch_host="$session_host" fi if [ x"${container_arch}" = x"default" ]; then if [ x"$label" != x"" ]; then container_arch=$(print_arch_for_label "$label") else container_arch=$(print_arch_for_host "$arch_host") fi elif [ x"$label" != x"" ]; then echo "--arch conflicts with --label" usage fi if [ x"$session_name" = x ]; then # Set the default session_name, using BUILD_NUMBER and JOB_NAME, # as set by Jenkins. # shellcheck disable=SC2153 if [ "x$BUILD_NUMBER" != "x" ] && [ "x$JOB_NAME" != "x" ]; then session_name="$BUILD_NUMBER-$JOB_NAME" else session_name="$USER-$(date +%Y%m%d-%H_%M_%S)" fi session_name=$(print_docker_name "$session_name") fi # Resolve LTS and LTS-1 values to Ubuntu distros. case "$distro:$container_arch" in lts_1:*|default:*) distro=bionic ;; lts:armhf) # There's still no arm32v7/ubuntu:focal docker image, so # force using bionic for armhf for now. distro=bionic ;; lts:*) distro=focal ;; esac image=linaro/ci-${container_arch}-tcwg-build-ubuntu:${distro} # Avoid connexion sharing because of race conditions with parallel # builds SSH="ssh -S none" # Note that when we use this we *want* it to split on spaces # So that the shell runs: # foo bar docker <...> # Instead of: # "foo bar docker" <...> DOCKER="$dryruncmd $SSH $session_host docker-wrapper" $DOCKER maybepull $image || ssh_error $? SECURITY="--cap-add=SYS_PTRACE" # We need this because of a bug in libgo's configure script: # it would crash when testing "whether setcontext clobbers TLS # variables", and report neither "no" nor "yes", later making # configure fail. # Also, because the sanitizers need to disable ASLR during the tests # and docker needs to explicitly enable the process to do that on all # architectures. SECURITY="${SECURITY} --security-opt seccomp:unconfined" # Reserve resources according to weight and task nproc=$($SSH $session_host nproc --all) pids=$(print_pids_limit "$task" "$weight") cpus=$(print_cpu_shares "$task" "$weight") memory=$(print_memory_limit "$task" "$weight") memory_opt="--memory=${memory}M" if [ x"$memory" = x"unlimited" ]; then memory_opt="" fi if [ x"${JOB_NAME:+set}" = x"set" ]; then job_name="$JOB_NAME" fi IFS=" " read -r -a bind_mounts <<< "$(print_bind_mounts "$task" "$SSH $session_host")" bind_mounted_workspace=false bind_mounts_opt=() for bind_mount in "${bind_mounts[@]}"; do dir="${bind_mount%%:*}" if [ x"$dir" = x"$WORKSPACE" ]; then bind_mounted_workspace=true fi # Make sure all bind-mount /home/* directories exist. # If a host bind-mount dir doesn't exist, then docker creates it on # the host with root:root owner, which can't be removed by cleanup job. case "$dir" in "/home/"*) $dryruncmd $SSH $session_host mkdir -p "$dir" ;; esac bind_mounts_opt=("${bind_mounts_opt[@]}" "-v" "$dir:$bind_mount") done IFS=" " read -r -a volume_mounts <<< "$(print_volume_mounts "$job_name" "-$container_arch-$distro")" for mount in "${volume_mounts[@]}"; do bind_mounts_opt=("${bind_mounts_opt[@]}" "-v" "$mount") done # Give access to all CPUs to container. # This happens by default on most machines, but on machines that can put # unused cores offline (TK1s and TX1s) it can happen that docker cpuset # cgroup gets configured to use only a subset of cores. If this happens, # then we have a bug in our HW scripts, and here is the best place # to detect this problem -- docker run will fail if it can't provide # any of the CPUs to the new container. cpuset_opt="--cpuset-cpus 0-$(($nproc - 1))" echo "DEBUG: starting docker on $session_host from $(hostname), date $(date)" # shellcheck disable=SC2206 docker_run=($DOCKER run --name $session_name -dtP \ "${bind_mounts_opt[@]}" \ ${SECURITY} \ ${memory_opt} \ "--pids-limit=${pids}" \ "--cpu-shares=${cpus}" \ $cpuset_opt \ ${docker_opts} \ $image) || ssh_error $? echo "${docker_run[@]}" # FIXME: It seems in some cases $DOCKER run generates a session_id but # returns an error code. Try to get more information ret=0 session_id=$("${docker_run[@]}") || ret=$? if [ $ret -ne 0 ]; then if [ $ret -eq 255 ]; then echo "WARNING: $SSH $session_host returned an error ($ret). Trying another ssh connexion to get debug logs" $SSH -v $session_host true else echo "WARNING: docker run returned an error: $ret, trying to continue nonetheless..." fi fi if [ x"$session_id" = x ]; then echo "ERROR: could not create session_id" exit 1 fi # Remove the docker instance we have just created in case something # goes wrong. CONTAINER_CLEANUP="$DOCKER rm -fv ${session_id}" # shellcheck disable=SC2064 trap "exec 1>&3 2>&4 ; ${CONTAINER_CLEANUP}" EXIT if [ x"$newuser" != x"" ]; then $DOCKER exec "$session_id" new-user.sh --user $newuser fi session_port=$($DOCKER port $session_id 22 | cut -d: -f 2) || ssh_error $? # Wait until the ssh server is ready to serve connexions # Make sure connexion messages go to stderr, so that in case of # success stdout contains only the connexion info expected by the # caller. ret=0 $dryruncmd wait_for_ssh_server ${user}$session_host $session_port || ret=$? if [ $ret != 0 ]; then echo SSH server did not respond, exiting exit $ret fi # For CI builds make sure to kill previous build, which might have been # aborted by jenkins, but processes could have survived. Otherwise old # build can start writing to files of the current build. if $bind_mounted_workspace; then prev_container=$($SSH $session_host flock "$WORKSPACE/.lock" cat "$WORKSPACE/.lock" || true) # Container may have been cleaned up by something else if [ x"$prev_container" != x"" ] && [ "$(docker ps -a | grep $prev_container)" ] ; then echo "NOTE: Removing previous container for $WORKSPACE" $DOCKER rm -vf "$prev_container" || echo "WARNING: Could not remove $prev_container" fi $SSH $session_host bash -c "\"mkdir -p $WORKSPACE && flock $WORKSPACE/.lock echo $session_name > $WORKSPACE/.lock\"" CONTAINER_CLEANUP="${CONTAINER_CLEANUP}; $SSH $session_host flock $WORKSPACE/.lock rm $WORKSPACE/.lock" fi # Do not remove the container upon exit: it is now ready trap EXIT ssh_info_opt="" if $ssh_info; then ssh_info_opt="ssh_host=${user}$session_host ssh_port=$session_port" fi # Restore stdout/stderr exec 1>&3 2>&4 cat <