diff options
Diffstat (limited to 'tcwg-benchmark.sh')
-rwxr-xr-x | tcwg-benchmark.sh | 363 |
1 files changed, 233 insertions, 130 deletions
diff --git a/tcwg-benchmark.sh b/tcwg-benchmark.sh index 37859848..21e5fa9e 100755 --- a/tcwg-benchmark.sh +++ b/tcwg-benchmark.sh @@ -2,7 +2,7 @@ # Clean: shellcheck -e 2001 ./tcwg-benchmark.sh -set -eux +set -eu scripts=$(dirname "$0") # shellcheck source=jenkins-helpers.sh @@ -23,12 +23,12 @@ obligatory_variables \ sysroot \ forceinstall \ builder \ - results_id \ - BUILD_NUMBER \ + results_dest \ WORKSPACE \ reboot \ ignore_errors \ - clean_older_than + clean_older_than \ + hw_tag declare -g \ boardname \ image_arch \ @@ -43,29 +43,27 @@ declare -g \ sysroot \ forceinstall \ builder \ - results_id \ - BUILD_NUMBER \ + results_dest \ WORKSPACE \ reboot \ ignore_errors \ - clean_older_than + clean_older_than \ + hw_tag # Make shellcheck happy and workaround Jenkins not defining variables # for empty arguments. -bench_container_tag="${bench_container_tag-bionic}" -build_container_tag="${build_container_tag-bionic}" +bench_container_tag="${bench_container_tag-default}" toolchain_type="${toolchain_type-auto}" prepare_board="${prepare_board-true}" +verbose="${verbose-true}" +support_fortran_opt="" -if echo "$builder" | grep -q ".*-[0-9]\+"; then - docker_host_opt="--arch amd64 --node $builder" -else - docker_host_opt="--label $builder" +if $verbose; then + set -x fi -# shellcheck source=jenkins-helpers.sh -. $scripts/jenkins-helpers.sh - +prepare_toolchain () +{ # If $toolchain_url is of ssh:// type, don't create a remote build # container, just use the ssh command as provided. build_container_host= @@ -83,27 +81,32 @@ case "$toolchain_url" in *:*:*) build_container_port="$(echo $build | cut -s -d: -f 2)" ;; - *:*) - # If no port is specified, use 22 (ssh default port) - build_container_port=22 - ;; esac if [ "x$build_container_host" = "x" ]; then echo "ERROR: ssh:// toolchain_url lacks a host: $toolchain_url." exit 1 fi - if [ "x$build_container_port" = "x" ]; then - echo "ERROR: ssh:// toolchain_url lacks a port: $toolchain_url." - exit 1 - fi ;; *) - # Make sure to cleanup build container if something goes - # wrong when preparing the test environment - trap "cleanup_all_containers" EXIT - $scripts/start-container-docker.sh $docker_host_opt --distro "$build_container_tag" --task build --prefix build_ > build-container.sh - . ./build-container.sh + if [ x"$builder" = x"bmk_board" ]; then + # shellcheck disable=SC2154 + build_container_host=$run_container_host + # shellcheck disable=SC2154 + build_container_port=$run_container_port + else + build_container_tag="${builder#*:}" + builder="${builder%:*}" + if echo "$builder" | grep ".*-[0-9]\+" >/dev/null; then + # Builder is a specific node + docker_host_opt="--arch amd64 --node $builder" + else + docker_host_opt="--label $builder" + fi + + $scripts/start-container-docker.sh $docker_host_opt --distro "$build_container_tag" --task build --prefix build_ > build-container.sh + . ./build-container.sh + fi ;; esac @@ -121,6 +124,21 @@ case "$toolchain_url" in ;; "rsync://"*) ccprefix="${toolchain_url##rsync://}" + + # Extract host:port: specification from ccprefix, we don't + # need to care about :parallelize here, just pass it to run.sh + # if present. + rsync_spec=${ccprefix%:*} + rsync_host="$(echo $rsync_spec | cut -d: -f 1)" + case ${ccprefix} in + *:*:*) + rsync_port="$(echo $rsync_spec | cut -s -d: -f 2)" + ;; + *:*) + # If no port is specified, use 22 (ssh default port) + rsync_port=22 + ;; + esac # We want to access the remote toolchain via a container, to # avoid problems with the hosts's ssh server restrictions on the # number of simulaneous connexions. @@ -128,9 +146,10 @@ case "$toolchain_url" in # architecture as the machine pointed to by $toolchain_url). # Assume ccprefix looks like /path/bin/target-triplet-, and # compute 'path'. - src_toolchaindir=$(dirname "$(dirname ${ccprefix})") - toolchaindir="${WORKSPACE}/toolchain-${BUILD_NUMBER}" - rsync -az --delete "$src_toolchaindir/" "$toolchaindir/" + src_toolchaindir=$(dirname "$(dirname ${ccprefix##*:})") + toolchaindir="${WORKSPACE}/toolchain" + rsync -az --delete -e "ssh -p$rsync_port" \ + "$rsync_host:$src_toolchaindir/" "$toolchaindir/" ;; *) echo "ERROR: Cannot handle toolchain_url: $toolchain_url" @@ -147,56 +166,62 @@ case "$toolchain_type" in ;; esac +# In the ssh:// case, we have to perform the 'find' operations +# remotely. case "$toolchain_url" in - "http://"*|"https://"*|"rsync://"*|"ssh://"*) - - # In the ssh:// case, we have to perform the 'find' operations - # remotely. - case "$toolchain_url" in - "ssh://"*) - maybe_remote="ssh -p $build_container_port $build_container_host" - ;; - *) - maybe_remote="" - ;; - esac + "ssh://"*) + maybe_remote="ssh ${build_container_port:+-p$build_container_port} $build_container_host" + ;; + *) + maybe_remote="" + ;; +esac - case "$toolchain_type" in - "gnu"|"llvm") ;; - "auto") - if [ x"$($maybe_remote find "$toolchaindir" -path "*bin/*gcc" | wc -l)" != x"0" ]; then - toolchain_type="gnu" - elif [ x"$($maybe_remote find "$toolchaindir" -path "*bin/*clang" | wc -l)" != x"0" ]; then - toolchain_type="llvm" - else - echo "ERROR: Cannot autodetect toolchain type" - exit 1 - fi - ;; - esac +case "$toolchain_type" in + "gnu") ;; + "llvm") + if [ x"$($maybe_remote find "$toolchaindir" -path "*bin/*flang-new" | wc -l)" != x"0" ]; then + support_fortran_opt="--support_fortran" + fi + ;; + "auto") + if [ x"$($maybe_remote find "$toolchaindir" -path "*bin/*gcc" | wc -l)" != x"0" ]; then + toolchain_type="gnu" + elif [ x"$($maybe_remote find "$toolchaindir" -path "*bin/*clang" | wc -l)" != x"0" ]; then + toolchain_type="llvm" + else + echo "ERROR: Cannot autodetect toolchain type" + exit 1 + fi + ;; +esac +# Non-ssh:// cases have to copy the just-copied toolchain to +# the remote build container. For ssh://, we'll access the +# toolchain remotely. +case "$toolchain_url" in + "ssh://"*) ;; + *) case "$toolchain_type" in "gnu") ccname="gcc" ;; "llvm") ccname="clang" ;; esac + ccpath=$($maybe_remote find "$toolchaindir" -path "*bin/*$ccname") if [ "$(echo "$ccpath" | wc -w)" -ne 1 ]; then echo "ERROR: found more than one compiler: $ccpath" exit 1 fi - # Non-ssh:// cases have to copy the just-copied toolchain to - # the remote build container. For ssh://, we'll access the - # toolchain remotely. - case "$toolchain_url" in - "ssh://"*) ;; - *) - ccprefix=$(echo "$ccpath" | sed -e "s/$ccname\$//") - # Copy toolchain to the build container. - rsync -a --delete -e "ssh -p$build_container_port" "$toolchaindir/" "$build_container_host:$toolchaindir/" - ccprefix="$build_container_host:$build_container_port:$ccprefix" - ;; - esac + ccprefix=$(echo "$ccpath" | sed -e "s/$ccname\$//") + # Copy toolchain to the build container. + ssh ${build_container_port:+-p$build_container_port} \ + $build_container_host mkdir -p "$toolchaindir" + rsync -a --del -e "ssh ${build_container_port:+-p$build_container_port}" \ + "$toolchaindir/" "$build_container_host:$toolchaindir/" + if [ x"$builder" != x"bmk_board" ]; then + ccprefix="$build_container_host:$build_container_port:$ccprefix" + fi ;; esac @@ -207,7 +232,10 @@ case "$sysroot" in "http://"*|"https://"*) sysrootdir=$(untar_url "$sysroot" "$WORKSPACE" "--strip-components 1") # Copy toolchain to the build container. - rsync -a --delete -e "ssh -p$build_container_port" "$sysrootdir/" "$build_container_host:$sysrootdir/" + ssh ${build_container_port:+-p$build_container_port} \ + $build_container_host mkdir -p "$sysrootdir" + rsync -a --del -e "ssh ${build_container_port:+-p$build_container_port}" \ + "$sysrootdir/" "$build_container_host:$sysrootdir/" sysroot="$build_container_host:$build_container_port:$sysrootdir" ;; "ssh://"*) @@ -231,21 +259,23 @@ case "$sysroot" in exit 1 ;; esac +} -if echo "$results_id" | grep -q "\.\."; then - echo "ERROR: results_id should not escape /home/tcwg-benchmark/results* hierarchy; do not use \"..\"" - exit 1 -fi - -hw_tag="${results_id%%/*}" case "$hw_tag:$boardname:$image_arch" in + apm_32:*-apm-*:armhf) ;; + apm_64:*-apm-*:arm64) ;; sq_32:*-sq-*:armhf) ;; sq_64:*-sq-*:arm64) ;; + stm32:dev-*:amd64) ;; tk1_32:*-tk1-*:armhf) ;; tx1_64:*-tx1-*:arm64) ;; tx1_32:*-tx1-*:armhf) ;; + fx_32:*-fx-*:armhf) ;; + fx_64:*-fx-*:arm64) ;; + qc_32:*-qc-*:armhf) ;; + qc_64:*-qc-*:arm64) ;; *) - echo "ERROR: results_id does not start with a valid hw_tag: $hw_tag" + echo "ERROR: hw_tag parameter is not valid : $hw_tag" exit 1 ;; esac @@ -253,49 +283,96 @@ esac # Check that we can ssh to the board and rsync scripts. This ensures that # the board is online and filesystem is good condition. Try to reboot and/or # power-cycle the board as needed. -if $reboot; then - # 1. Try access after soft reboot - # 2. Try access after power-cycle - tries_left=2 -else - # 1. Try access without rebooting - # 2. Try access after soft reboot - # 3. Try access after power-cycle - tries_left=3 -fi +case "$hw_tag:$reboot" in + stm32:*) + # 1. If the host machine isn't available on the 1st try -- give up. + tries_left=1 + reboot=false + prepare_board=false + ;; + *:true) + # 1. Try access after soft reboot + # 2. Try access after power-cycle + tries_left=2 + ;; + *) + # 1. Try access without rebooting + # 2. Try access after soft reboot + # 3. Try access after power-cycle + tries_left=3 + ;; +esac force_power_cycle=false while [ $tries_left != 0 ]; do tries_left=$(($tries_left-1)) - if $reboot; then - if ! ssh "$boardname" true || $force_power_cycle; then + if timeout 1m ssh "$boardname" true; then + ssh_cmd="ssh" + wait_opts=() + elif timeout 1m ssh -p22 -lroot "$boardname" true; then + ssh_cmd="ssh -p22 -lroot" + wait_opts=(-p22 -lroot) + else + ssh_cmd="false" + wait_opts=(-p22 -lroot) + reboot=true + force_power_cycle=true + tries_left=0 + fi + + if $prepare_board; then + if ! $reboot; then + # Check board for kernel panics and reboot, if any. + dmesg_file="$boardname.dmesg-$(date +%s)" + timeout 1m $ssh_cmd "$boardname" dmesg -l emerg 2>&1 \ + | tee "$dmesg_file-emerg" + if [ x"$(cat "$dmesg_file-emerg" | wc -l)" != x"0" ]; then + reboot=true + timeout 1m $ssh_cmd "$boardname" dmesg 2>&1 \ + | tee "$dmesg_file" + else + # Remove empty dmesg reports, but keep non-empty ones for + # offline analysis -- e.g., to understand frequency and + # nature of kernel panics. + rm "$dmesg_file-emerg" + fi + fi + + if $force_power_cycle; then echo "Trying to power-cycle $boardname" ( pdu_name=$(echo "${boardname%.tcwglab}" \ | sed -e 's/^tcwg-bmk-/tcwg-/') nvidia-power-cycle.sh "$pdu_name" - wait_for_ssh_server "$boardname" 22 100 + wait_for_ssh_server "$boardname" 150 "${wait_opts[@]}" ) & wait $! || exit $EXTERNAL_FAIL echo "Successfully powered-cycled $boardname" - else + elif $reboot; then + echo "Trying to reboot $boardname" # Reboot the board. # Ping board every second (ServerAliveInterval=1) to avoid # waiting [default] 5min for ssh to break connection. - ssh -Snone -oServerAliveInterval=1 $boardname sudo /sbin/reboot \ - || true + $ssh_cmd -Snone -oServerAliveInterval=1 $boardname \ + sudo reboot || true + # Wait until the ssh server is ready sleep 30 # Give time to the board to shutdown - ret=0 - wait_for_ssh_server $boardname 22 100 || ret=$? + wait_for_ssh_server "$boardname" 150 "${wait_opts[@]}" & + ret=0 && wait $! || ret=$? if [ $ret != 0 ]; then - echo "SSH server did not respond after reboot, exiting." - exit $EXTERNAL_FAIL + echo "SSH server did not respond after reboot" fi fi fi - rsync -az --delete bmk-scripts/ "$boardname:bmk-scripts/" & + ( + if $prepare_board; then + $scripts/tcwg-update-bmk-containers.sh --board "$boardname" \ + --test_docker true + fi + rsync -az --del bmk-scripts/ "$boardname:bmk-scripts/" + ) & res=0 && wait $! || res=$? if [ $res = 0 ]; then break @@ -312,11 +389,6 @@ if [ $res != 0 ]; then exit $EXTERNAL_FAIL fi -case "$testmode" in - build|verify) input_size="test" ;; - benchmark) input_size="ref" ;; -esac - if $prepare_board; then # FIXME: Implement more configurations and checks: # disable swap @@ -324,9 +396,8 @@ if $prepare_board; then # check that there are no stray processes # test that taskset works remote_exec "$boardname:::-t -Snone" \ - sudo /usr/local/bin/benchmark.sh --hw_tag "$hw_tag" \ - --action start_board --verbose \ - --image "linaro/ci-$image_arch-tcwg-build-ubuntu:$bench_container_tag" & + sudo bmk-scripts/prepare-board.sh --hw_tag "$hw_tag" \ + --action start_board --verbose & res=0 && wait $! || res=$? if [ $res != 0 ]; then @@ -335,9 +406,13 @@ if $prepare_board; then fi fi +# Make sure to cleanup build container if something goes +# wrong when preparing the test environment +trap "cleanup_all_containers" EXIT + # Start a container to run the benchmarks in. # We install SPEC in /home/tcwg-benchmark, so bind-mount it as $WORKSPACE. -WORKSPACE=$HOME $scripts/start-container-docker.sh --session-host "$boardname" --arch "$image_arch" --distro "$bench_container_tag" --task bench --docker_opts "--privileged" --prefix run_ > run-container.sh & +WORKSPACE=$HOME $scripts/start-container-docker.sh --session-host "$boardname" --arch "$image_arch" --distro "$bench_container_tag" --task bench --security "--privileged" --prefix run_ > run-container.sh & res=0 && wait $! || res=$? if [ $res != 0 ]; then @@ -347,33 +422,61 @@ fi trap "cleanup_all_containers" EXIT . ./run-container.sh +declare -g run_container_host run_container_port + +prepare_toolchain + +case "$bench_list" in + coremark) + remote_exec "$run_container_host:$run_container_port:bmk-scripts:-t -Snone" \ + ./coremark.sh \ + --ccprefix "$ccprefix" \ + --cflags "$cflags" \ + --ldflags "$ldflags" \ + --forceinstall "true" \ + --resultsdest "${results_dest}/$boardname" \ + --verbose true + ;; + *) # any others keywords corresponds to spec2xxx (either 2006 or 2017) + case "$testmode" in + build) input_size="test" ;; + verify) input_size="train" ;; + benchmark) input_size="ref" ;; + esac + + #spec_config follows run_profile + case "$run_profile" in + serial) config="serial" ;; + parallel|parallel_*) config="parallel" ;; + esac -# vars are from run-container.sh sourced above -# shellcheck disable=SC2154 -remote_exec "$run_container_host:$run_container_port::-t -Snone" \ - bmk-scripts/run.sh \ - --bench "$bench_list" \ - --config "${BUILD_NUMBER}-$run_profile" \ - --cflags "$cflags" \ - --ldflags "$ldflags" \ - --ccprefix "$ccprefix" \ - --extension "$extension" \ - --hw_tag "$hw_tag" \ - --ignore_errors "$ignore_errors" \ - --input_size "$input_size" \ - --iterations "$iterations" \ - --run_profile "$run_profile" \ - ${sysroot:+--sysroot "$sysroot"} \ - --toolchain "$toolchain_type" \ - --resultsdest "bkp-01.tcwglab:/home/tcwg-benchmark/results-${results_id}/$boardname" \ - --nodename "$boardname" \ - --forceinstall "${forceinstall}" \ - ${clean_older_than:+--clean_older_than "$clean_older_than"} \ - --verbose true + remote_exec "$run_container_host:$run_container_port::-t -Snone" \ + bmk-scripts/run.sh \ + --bench "$bench_list" \ + --config "$config" \ + --cflags "$cflags" \ + --ldflags "$ldflags" \ + --ccprefix "$ccprefix" \ + --extension "$extension" \ + --hw_tag "$hw_tag" \ + --ignore_errors "$ignore_errors" \ + --input_size "$input_size" \ + --iterations "$iterations" \ + --run_profile "$run_profile" \ + ${sysroot:+--sysroot "$sysroot"} \ + --toolchain "$toolchain_type" \ + $support_fortran_opt \ + --resultsdest "${results_dest}/$boardname" \ + --nodename "$boardname" \ + --forceinstall "${forceinstall}" \ + ${clean_older_than:+--clean_older_than "$clean_older_than"} \ + --verbose true + ;; +esac if $prepare_board; then remote_exec "$boardname:::-t -Snone" \ - sudo /usr/local/bin/benchmark.sh --action stop_board --verbose & + sudo bmk-scripts/prepare-board.sh --action stop_board --verbose & res=0 && wait $! || res=$? if [ $res != 0 ]; then echo "Warning: prepare-board.sh did not finish cleanly" |