#!/bin/bash # Clean: shellcheck -e 2001 ./tcwg-benchmark.sh set -eu scripts=$(dirname "$0") # shellcheck source=jenkins-helpers.sh . $scripts/jenkins-helpers.sh convert_args_to_variables "$@" obligatory_variables \ boardname \ image_arch \ toolchain_url \ bench_list \ cflags \ ldflags \ extension \ testmode \ iterations \ run_profile \ sysroot \ forceinstall \ builder \ results_dest \ WORKSPACE \ reboot \ ignore_errors \ clean_older_than \ hw_tag declare -g \ boardname \ image_arch \ toolchain_url \ bench_list \ cflags \ ldflags \ extension \ testmode \ iterations \ run_profile \ sysroot \ forceinstall \ builder \ results_dest \ WORKSPACE \ reboot \ ignore_errors \ clean_older_than \ hw_tag # Make shellcheck happy and workaround Jenkins not defining variables # for empty arguments. bench_container_tag="${bench_container_tag-default}" toolchain_type="${toolchain_type-auto}" prepare_board="${prepare_board-true}" verbose="${verbose-true}" support_fortran_opt="" if $verbose; then set -x fi prepare_toolchain () { # If $toolchain_url is of ssh:// type, don't create a remote build # container, just use the ssh command as provided. build_container_host= build_container_port= case "$toolchain_url" in "ssh://"*) ccprefix="${toolchain_url##ssh://}" # Extract host:port: specification from ccprefix, we don't # need to care about :parallelize here, just pass it to run.sh # if present. build=${ccprefix%:*} build_container_host="$(echo $build | cut -d: -f 1)" case ${ccprefix} in *:*:*) build_container_port="$(echo $build | cut -s -d: -f 2)" ;; esac if [ "x$build_container_host" = "x" ]; then echo "ERROR: ssh:// toolchain_url lacks a host: $toolchain_url." exit 1 fi ;; *) if [ x"$builder" = x"bmk_board" ]; then # shellcheck disable=SC2154 build_container_host=$run_container_host # shellcheck disable=SC2154 build_container_port=$run_container_port else build_container_tag="${builder#*:}" builder="${builder%:*}" if echo "$builder" | grep ".*-[0-9]\+" >/dev/null; then # Builder is a specific node docker_host_opt="--arch amd64 --node $builder" else docker_host_opt="--label $builder" fi $scripts/start-container-docker.sh $docker_host_opt --distro "$build_container_tag" --task build --prefix build_ > build-container.sh . ./build-container.sh fi ;; esac case "$toolchain_url" in "ssh://"*) if [ x"$sysroot" = x"tarball" ]; then echo "ERROR: Unsupported sysroot $sysroot for toolchain_url $toolchain_url" exit 1 fi # Last component of ccprefix is the path, keep it toolchaindir="$(dirname ${ccprefix##*:})" ;; "http://"*".tar.xz"|"https://"*".tar.xz") toolchaindir=$(untar_url "$toolchain_url" "$WORKSPACE" "--strip-components 1") ;; "rsync://"*) ccprefix="${toolchain_url##rsync://}" # Extract host:port: specification from ccprefix, we don't # need to care about :parallelize here, just pass it to run.sh # if present. rsync_spec=${ccprefix%:*} rsync_host="$(echo $rsync_spec | cut -d: -f 1)" case ${ccprefix} in *:*:*) rsync_port="$(echo $rsync_spec | cut -s -d: -f 2)" ;; *:*) # If no port is specified, use 22 (ssh default port) rsync_port=22 ;; esac # We want to access the remote toolchain via a container, to # avoid problems with the hosts's ssh server restrictions on the # number of simulaneous connexions. # We copy it to the build container (assuming it uses the same # architecture as the machine pointed to by $toolchain_url). # Assume ccprefix looks like /path/bin/target-triplet-, and # compute 'path'. src_toolchaindir=$(dirname "$(dirname ${ccprefix##*:})") toolchaindir="${WORKSPACE}/toolchain" rsync -az --delete -e "ssh -p$rsync_port" \ "$rsync_host:$src_toolchaindir/" "$toolchaindir/" ;; *) echo "ERROR: Cannot handle toolchain_url: $toolchain_url" exit 1 ;; esac # Sanity check that toolchain_type is supported case "$toolchain_type" in gnu|llvm) ;; *) echo "ERROR: Unsupported toolchain type: $toolchain_type" exit 1 ;; esac # In the ssh:// case, we have to perform the 'find' operations # remotely. case "$toolchain_url" in "ssh://"*) maybe_remote="ssh ${build_container_port:+-p$build_container_port} $build_container_host" ;; *) maybe_remote="" ;; esac case "$toolchain_type" in "gnu") ;; "llvm") if [ x"$($maybe_remote find "$toolchaindir" -path "*bin/*flang-new" | wc -l)" != x"0" ]; then support_fortran_opt="--support_fortran" fi ;; "auto") if [ x"$($maybe_remote find "$toolchaindir" -path "*bin/*gcc" | wc -l)" != x"0" ]; then toolchain_type="gnu" elif [ x"$($maybe_remote find "$toolchaindir" -path "*bin/*clang" | wc -l)" != x"0" ]; then toolchain_type="llvm" else echo "ERROR: Cannot autodetect toolchain type" exit 1 fi ;; esac # Non-ssh:// cases have to copy the just-copied toolchain to # the remote build container. For ssh://, we'll access the # toolchain remotely. case "$toolchain_url" in "ssh://"*) ;; *) case "$toolchain_type" in "gnu") ccname="gcc" ;; "llvm") ccname="clang" ;; esac ccpath=$($maybe_remote find "$toolchaindir" -path "*bin/*$ccname") if [ "$(echo "$ccpath" | wc -w)" -ne 1 ]; then echo "ERROR: found more than one compiler: $ccpath" exit 1 fi ccprefix=$(echo "$ccpath" | sed -e "s/$ccname\$//") # Copy toolchain to the build container. ssh ${build_container_port:+-p$build_container_port} \ $build_container_host mkdir -p "$toolchaindir" rsync -a --del -e "ssh ${build_container_port:+-p$build_container_port}" \ "$toolchaindir/" "$build_container_host:$toolchaindir/" if [ x"$builder" != x"bmk_board" ]; then ccprefix="$build_container_host:$build_container_port:$ccprefix" fi ;; esac case "$sysroot" in "tarball") sysroot="$build_container_host:$build_container_port:$(find "$toolchaindir" -name "libc")" ;; "http://"*|"https://"*) sysrootdir=$(untar_url "$sysroot" "$WORKSPACE" "--strip-components 1") # Copy toolchain to the build container. ssh ${build_container_port:+-p$build_container_port} \ $build_container_host mkdir -p "$sysrootdir" rsync -a --del -e "ssh ${build_container_port:+-p$build_container_port}" \ "$sysrootdir/" "$build_container_host:$sysrootdir/" sysroot="$build_container_host:$build_container_port:$sysrootdir" ;; "ssh://"*) sysroot="${sysroot##ssh://}" # Check host:port specification from sysroot. case ${sysroot} in *:*) ;; *) echo "ERROR: ssh:// sysroot lacks a host: $sysroot" exit 1 ;; esac ;; "") # Use system sysroot. ;; *) echo "ERROR: Cannot handle sysroot: $sysroot" exit 1 ;; esac } case "$hw_tag:$boardname:$image_arch" in apm_32:*-apm-*:armhf) ;; apm_64:*-apm-*:arm64) ;; sq_32:*-sq-*:armhf) ;; sq_64:*-sq-*:arm64) ;; stm32:dev-*:amd64) ;; tk1_32:*-tk1-*:armhf) ;; tx1_64:*-tx1-*:arm64) ;; tx1_32:*-tx1-*:armhf) ;; fx_32:*-fx-*:armhf) ;; fx_64:*-fx-*:arm64) ;; qc_32:*-qc-*:armhf) ;; qc_64:*-qc-*:arm64) ;; *) echo "ERROR: hw_tag parameter is not valid : $hw_tag" exit 1 ;; esac # Check that we can ssh to the board and rsync scripts. This ensures that # the board is online and filesystem is good condition. Try to reboot and/or # power-cycle the board as needed. case "$hw_tag:$reboot" in stm32:*) # 1. If the host machine isn't available on the 1st try -- give up. tries_left=1 reboot=false prepare_board=false ;; *:true) # 1. Try access after soft reboot # 2. Try access after power-cycle tries_left=2 ;; *) # 1. Try access without rebooting # 2. Try access after soft reboot # 3. Try access after power-cycle tries_left=3 ;; esac force_power_cycle=false while [ $tries_left != 0 ]; do tries_left=$(($tries_left-1)) if timeout 1m ssh "$boardname" true; then ssh_cmd="ssh" wait_opts=() elif timeout 1m ssh -p22 -lroot "$boardname" true; then ssh_cmd="ssh -p22 -lroot" wait_opts=(-p22 -lroot) else ssh_cmd="false" wait_opts=(-p22 -lroot) reboot=true force_power_cycle=true tries_left=0 fi if $prepare_board; then if ! $reboot; then # Check board for kernel panics and reboot, if any. dmesg_file="$boardname.dmesg-$(date +%s)" timeout 1m $ssh_cmd "$boardname" dmesg -l emerg 2>&1 \ | tee "$dmesg_file-emerg" if [ x"$(cat "$dmesg_file-emerg" | wc -l)" != x"0" ]; then reboot=true timeout 1m $ssh_cmd "$boardname" dmesg 2>&1 \ | tee "$dmesg_file" else # Remove empty dmesg reports, but keep non-empty ones for # offline analysis -- e.g., to understand frequency and # nature of kernel panics. rm "$dmesg_file-emerg" fi fi if $force_power_cycle; then echo "Trying to power-cycle $boardname" ( pdu_name=$(echo "${boardname%.tcwglab}" \ | sed -e 's/^tcwg-bmk-/tcwg-/') nvidia-power-cycle.sh "$pdu_name" wait_for_ssh_server "$boardname" 150 "${wait_opts[@]}" ) & wait $! || exit $EXTERNAL_FAIL echo "Successfully powered-cycled $boardname" elif $reboot; then echo "Trying to reboot $boardname" # Reboot the board. # Ping board every second (ServerAliveInterval=1) to avoid # waiting [default] 5min for ssh to break connection. $ssh_cmd -Snone -oServerAliveInterval=1 $boardname \ sudo reboot || true # Wait until the ssh server is ready sleep 30 # Give time to the board to shutdown wait_for_ssh_server "$boardname" 150 "${wait_opts[@]}" & ret=0 && wait $! || ret=$? if [ $ret != 0 ]; then echo "SSH server did not respond after reboot" fi fi fi ( if $prepare_board; then $scripts/tcwg-update-bmk-containers.sh --board "$boardname" \ --test_docker true fi rsync -az --del bmk-scripts/ "$boardname:bmk-scripts/" ) & res=0 && wait $! || res=$? if [ $res = 0 ]; then break else reboot=true if [ x"$tries_left" = x"1" ]; then force_power_cycle=true fi fi done if [ $res != 0 ]; then echo "ERROR: Could not get board online" exit $EXTERNAL_FAIL fi if $prepare_board; then # FIXME: Implement more configurations and checks: # disable swap # set interrupt affinity # check that there are no stray processes # test that taskset works remote_exec "$boardname:::-t -Snone" \ sudo bmk-scripts/prepare-board.sh --hw_tag "$hw_tag" \ --action start_board --verbose & res=0 && wait $! || res=$? if [ $res != 0 ]; then echo "ERROR: Could not prepare board for benchmarking" exit $EXTERNAL_FAIL fi fi # Make sure to cleanup build container if something goes # wrong when preparing the test environment trap "cleanup_all_containers" EXIT # Start a container to run the benchmarks in. # We install SPEC in /home/tcwg-benchmark, so bind-mount it as $WORKSPACE. WORKSPACE=$HOME $scripts/start-container-docker.sh --session-host "$boardname" --arch "$image_arch" --distro "$bench_container_tag" --task bench --security "--privileged" --prefix run_ > run-container.sh & res=0 && wait $! || res=$? if [ $res != 0 ]; then echo "ERROR: Could not start benchmarking container" exit $EXTERNAL_FAIL fi trap "cleanup_all_containers" EXIT . ./run-container.sh declare -g run_container_host run_container_port prepare_toolchain case "$bench_list" in coremark) remote_exec "$run_container_host:$run_container_port:bmk-scripts:-t -Snone" \ ./coremark.sh \ --ccprefix "$ccprefix" \ --cflags "$cflags" \ --ldflags "$ldflags" \ --forceinstall "true" \ --resultsdest "${results_dest}/$boardname" \ --verbose true ;; *) # any others keywords corresponds to spec2xxx (either 2006 or 2017) case "$testmode" in build) input_size="test" ;; verify) input_size="train" ;; benchmark) input_size="ref" ;; esac #spec_config follows run_profile case "$run_profile" in serial) config="serial" ;; parallel|parallel_*) config="parallel" ;; esac remote_exec "$run_container_host:$run_container_port::-t -Snone" \ bmk-scripts/run.sh \ --bench "$bench_list" \ --config "$config" \ --cflags "$cflags" \ --ldflags "$ldflags" \ --ccprefix "$ccprefix" \ --extension "$extension" \ --hw_tag "$hw_tag" \ --ignore_errors "$ignore_errors" \ --input_size "$input_size" \ --iterations "$iterations" \ --run_profile "$run_profile" \ ${sysroot:+--sysroot "$sysroot"} \ --toolchain "$toolchain_type" \ $support_fortran_opt \ --resultsdest "${results_dest}/$boardname" \ --nodename "$boardname" \ --forceinstall "${forceinstall}" \ ${clean_older_than:+--clean_older_than "$clean_older_than"} \ --verbose true ;; esac if $prepare_board; then remote_exec "$boardname:::-t -Snone" \ sudo bmk-scripts/prepare-board.sh --action stop_board --verbose & res=0 && wait $! || res=$? if [ $res != 0 ]; then echo "Warning: prepare-board.sh did not finish cleanly" fi fi