diff options
Diffstat (limited to 'tcwg-benchmark.sh')
-rwxr-xr-x | tcwg-benchmark.sh | 142 |
1 files changed, 96 insertions, 46 deletions
diff --git a/tcwg-benchmark.sh b/tcwg-benchmark.sh index 77921a20..21e5fa9e 100755 --- a/tcwg-benchmark.sh +++ b/tcwg-benchmark.sh @@ -23,11 +23,12 @@ obligatory_variables \ sysroot \ forceinstall \ builder \ - results_id \ + results_dest \ WORKSPACE \ reboot \ ignore_errors \ - clean_older_than + clean_older_than \ + hw_tag declare -g \ boardname \ image_arch \ @@ -42,18 +43,20 @@ declare -g \ sysroot \ forceinstall \ builder \ - results_id \ + results_dest \ WORKSPACE \ reboot \ ignore_errors \ - clean_older_than + clean_older_than \ + hw_tag # Make shellcheck happy and workaround Jenkins not defining variables # for empty arguments. -bench_container_tag="${bench_container_tag-bionic}" +bench_container_tag="${bench_container_tag-default}" toolchain_type="${toolchain_type-auto}" prepare_board="${prepare_board-true}" verbose="${verbose-true}" +support_fortran_opt="" if $verbose; then set -x @@ -78,20 +81,12 @@ case "$toolchain_url" in *:*:*) build_container_port="$(echo $build | cut -s -d: -f 2)" ;; - *:*) - # If no port is specified, use 22 (ssh default port) - build_container_port=22 - ;; esac if [ "x$build_container_host" = "x" ]; then echo "ERROR: ssh:// toolchain_url lacks a host: $toolchain_url." exit 1 fi - if [ "x$build_container_port" = "x" ]; then - echo "ERROR: ssh:// toolchain_url lacks a port: $toolchain_url." - exit 1 - fi ;; *) if [ x"$builder" = x"bmk_board" ]; then @@ -102,7 +97,7 @@ case "$toolchain_url" in else build_container_tag="${builder#*:}" builder="${builder%:*}" - if echo "$builder" | grep -q ".*-[0-9]\+"; then + if echo "$builder" | grep ".*-[0-9]\+" >/dev/null; then # Builder is a specific node docker_host_opt="--arch amd64 --node $builder" else @@ -175,7 +170,7 @@ esac # remotely. case "$toolchain_url" in "ssh://"*) - maybe_remote="ssh -p $build_container_port $build_container_host" + maybe_remote="ssh ${build_container_port:+-p$build_container_port} $build_container_host" ;; *) maybe_remote="" @@ -183,7 +178,12 @@ case "$toolchain_url" in esac case "$toolchain_type" in - "gnu"|"llvm") ;; + "gnu") ;; + "llvm") + if [ x"$($maybe_remote find "$toolchaindir" -path "*bin/*flang-new" | wc -l)" != x"0" ]; then + support_fortran_opt="--support_fortran" + fi + ;; "auto") if [ x"$($maybe_remote find "$toolchaindir" -path "*bin/*gcc" | wc -l)" != x"0" ]; then toolchain_type="gnu" @@ -215,7 +215,10 @@ case "$toolchain_url" in ccprefix=$(echo "$ccpath" | sed -e "s/$ccname\$//") # Copy toolchain to the build container. - rsync -a --delete -e "ssh -p$build_container_port" "$toolchaindir/" "$build_container_host:$toolchaindir/" + ssh ${build_container_port:+-p$build_container_port} \ + $build_container_host mkdir -p "$toolchaindir" + rsync -a --del -e "ssh ${build_container_port:+-p$build_container_port}" \ + "$toolchaindir/" "$build_container_host:$toolchaindir/" if [ x"$builder" != x"bmk_board" ]; then ccprefix="$build_container_host:$build_container_port:$ccprefix" fi @@ -229,7 +232,10 @@ case "$sysroot" in "http://"*|"https://"*) sysrootdir=$(untar_url "$sysroot" "$WORKSPACE" "--strip-components 1") # Copy toolchain to the build container. - rsync -a --delete -e "ssh -p$build_container_port" "$sysrootdir/" "$build_container_host:$sysrootdir/" + ssh ${build_container_port:+-p$build_container_port} \ + $build_container_host mkdir -p "$sysrootdir" + rsync -a --del -e "ssh ${build_container_port:+-p$build_container_port}" \ + "$sysrootdir/" "$build_container_host:$sysrootdir/" sysroot="$build_container_host:$build_container_port:$sysrootdir" ;; "ssh://"*) @@ -255,12 +261,6 @@ case "$sysroot" in esac } -if echo "$results_id" | grep -q "\.\."; then - echo "ERROR: results_id should not escape /home/tcwg-benchmark/results* hierarchy; do not use \"..\"" - exit 1 -fi - -hw_tag="${results_id%%/*}" case "$hw_tag:$boardname:$image_arch" in apm_32:*-apm-*:armhf) ;; apm_64:*-apm-*:arm64) ;; @@ -270,8 +270,12 @@ case "$hw_tag:$boardname:$image_arch" in tk1_32:*-tk1-*:armhf) ;; tx1_64:*-tx1-*:arm64) ;; tx1_32:*-tx1-*:armhf) ;; + fx_32:*-fx-*:armhf) ;; + fx_64:*-fx-*:arm64) ;; + qc_32:*-qc-*:armhf) ;; + qc_64:*-qc-*:arm64) ;; *) - echo "ERROR: results_id does not start with a valid hw_tag: $hw_tag" + echo "ERROR: hw_tag parameter is not valid : $hw_tag" exit 1 ;; esac @@ -302,35 +306,73 @@ force_power_cycle=false while [ $tries_left != 0 ]; do tries_left=$(($tries_left-1)) - if $reboot; then - if ! ssh "$boardname" true || $force_power_cycle; then + if timeout 1m ssh "$boardname" true; then + ssh_cmd="ssh" + wait_opts=() + elif timeout 1m ssh -p22 -lroot "$boardname" true; then + ssh_cmd="ssh -p22 -lroot" + wait_opts=(-p22 -lroot) + else + ssh_cmd="false" + wait_opts=(-p22 -lroot) + reboot=true + force_power_cycle=true + tries_left=0 + fi + + if $prepare_board; then + if ! $reboot; then + # Check board for kernel panics and reboot, if any. + dmesg_file="$boardname.dmesg-$(date +%s)" + timeout 1m $ssh_cmd "$boardname" dmesg -l emerg 2>&1 \ + | tee "$dmesg_file-emerg" + if [ x"$(cat "$dmesg_file-emerg" | wc -l)" != x"0" ]; then + reboot=true + timeout 1m $ssh_cmd "$boardname" dmesg 2>&1 \ + | tee "$dmesg_file" + else + # Remove empty dmesg reports, but keep non-empty ones for + # offline analysis -- e.g., to understand frequency and + # nature of kernel panics. + rm "$dmesg_file-emerg" + fi + fi + + if $force_power_cycle; then echo "Trying to power-cycle $boardname" ( pdu_name=$(echo "${boardname%.tcwglab}" \ | sed -e 's/^tcwg-bmk-/tcwg-/') nvidia-power-cycle.sh "$pdu_name" - wait_for_ssh_server "$boardname" 22 100 + wait_for_ssh_server "$boardname" 150 "${wait_opts[@]}" ) & wait $! || exit $EXTERNAL_FAIL echo "Successfully powered-cycled $boardname" - else + elif $reboot; then + echo "Trying to reboot $boardname" # Reboot the board. # Ping board every second (ServerAliveInterval=1) to avoid # waiting [default] 5min for ssh to break connection. - ssh -Snone -oServerAliveInterval=1 $boardname sudo /sbin/reboot \ - || true + $ssh_cmd -Snone -oServerAliveInterval=1 $boardname \ + sudo reboot || true + # Wait until the ssh server is ready sleep 30 # Give time to the board to shutdown - ret=0 - wait_for_ssh_server $boardname 22 100 || ret=$? + wait_for_ssh_server "$boardname" 150 "${wait_opts[@]}" & + ret=0 && wait $! || ret=$? if [ $ret != 0 ]; then - echo "SSH server did not respond after reboot, exiting." - exit $EXTERNAL_FAIL + echo "SSH server did not respond after reboot" fi fi fi - rsync -az --delete bmk-scripts/ "$boardname:bmk-scripts/" & + ( + if $prepare_board; then + $scripts/tcwg-update-bmk-containers.sh --board "$boardname" \ + --test_docker true + fi + rsync -az --del bmk-scripts/ "$boardname:bmk-scripts/" + ) & res=0 && wait $! || res=$? if [ $res = 0 ]; then break @@ -354,9 +396,8 @@ if $prepare_board; then # check that there are no stray processes # test that taskset works remote_exec "$boardname:::-t -Snone" \ - sudo /usr/local/bin/benchmark.sh --hw_tag "$hw_tag" \ - --action start_board --verbose \ - --image "linaro/ci-$image_arch-tcwg-build-ubuntu:$bench_container_tag" & + sudo bmk-scripts/prepare-board.sh --hw_tag "$hw_tag" \ + --action start_board --verbose & res=0 && wait $! || res=$? if [ $res != 0 ]; then @@ -371,7 +412,7 @@ trap "cleanup_all_containers" EXIT # Start a container to run the benchmarks in. # We install SPEC in /home/tcwg-benchmark, so bind-mount it as $WORKSPACE. -WORKSPACE=$HOME $scripts/start-container-docker.sh --session-host "$boardname" --arch "$image_arch" --distro "$bench_container_tag" --task bench --docker_opts "--privileged" --prefix run_ > run-container.sh & +WORKSPACE=$HOME $scripts/start-container-docker.sh --session-host "$boardname" --arch "$image_arch" --distro "$bench_container_tag" --task bench --security "--privileged" --prefix run_ > run-container.sh & res=0 && wait $! || res=$? if [ $res != 0 ]; then @@ -393,18 +434,26 @@ case "$bench_list" in --cflags "$cflags" \ --ldflags "$ldflags" \ --forceinstall "true" \ - --resultsdest "bkp-01.tcwglab:/home/tcwg-benchmark/results-${results_id}/$boardname" \ + --resultsdest "${results_dest}/$boardname" \ --verbose true ;; - *) + *) # any others keywords corresponds to spec2xxx (either 2006 or 2017) case "$testmode" in - build|verify) input_size="test" ;; + build) input_size="test" ;; + verify) input_size="train" ;; benchmark) input_size="ref" ;; esac + + #spec_config follows run_profile + case "$run_profile" in + serial) config="serial" ;; + parallel|parallel_*) config="parallel" ;; + esac + remote_exec "$run_container_host:$run_container_port::-t -Snone" \ bmk-scripts/run.sh \ --bench "$bench_list" \ - --config "$run_profile" \ + --config "$config" \ --cflags "$cflags" \ --ldflags "$ldflags" \ --ccprefix "$ccprefix" \ @@ -416,7 +465,8 @@ case "$bench_list" in --run_profile "$run_profile" \ ${sysroot:+--sysroot "$sysroot"} \ --toolchain "$toolchain_type" \ - --resultsdest "bkp-01.tcwglab:/home/tcwg-benchmark/results-${results_id}/$boardname" \ + $support_fortran_opt \ + --resultsdest "${results_dest}/$boardname" \ --nodename "$boardname" \ --forceinstall "${forceinstall}" \ ${clean_older_than:+--clean_older_than "$clean_older_than"} \ @@ -426,7 +476,7 @@ esac if $prepare_board; then remote_exec "$boardname:::-t -Snone" \ - sudo /usr/local/bin/benchmark.sh --action stop_board --verbose & + sudo bmk-scripts/prepare-board.sh --action stop_board --verbose & res=0 && wait $! || res=$? if [ $res != 0 ]; then echo "Warning: prepare-board.sh did not finish cleanly" |