summaryrefslogtreecommitdiff
path: root/tcwg-benchmark.sh
diff options
context:
space:
mode:
Diffstat (limited to 'tcwg-benchmark.sh')
-rwxr-xr-xtcwg-benchmark.sh142
1 files changed, 96 insertions, 46 deletions
diff --git a/tcwg-benchmark.sh b/tcwg-benchmark.sh
index 77921a20..21e5fa9e 100755
--- a/tcwg-benchmark.sh
+++ b/tcwg-benchmark.sh
@@ -23,11 +23,12 @@ obligatory_variables \
sysroot \
forceinstall \
builder \
- results_id \
+ results_dest \
WORKSPACE \
reboot \
ignore_errors \
- clean_older_than
+ clean_older_than \
+ hw_tag
declare -g \
boardname \
image_arch \
@@ -42,18 +43,20 @@ declare -g \
sysroot \
forceinstall \
builder \
- results_id \
+ results_dest \
WORKSPACE \
reboot \
ignore_errors \
- clean_older_than
+ clean_older_than \
+ hw_tag
# Make shellcheck happy and workaround Jenkins not defining variables
# for empty arguments.
-bench_container_tag="${bench_container_tag-bionic}"
+bench_container_tag="${bench_container_tag-default}"
toolchain_type="${toolchain_type-auto}"
prepare_board="${prepare_board-true}"
verbose="${verbose-true}"
+support_fortran_opt=""
if $verbose; then
set -x
@@ -78,20 +81,12 @@ case "$toolchain_url" in
*:*:*)
build_container_port="$(echo $build | cut -s -d: -f 2)"
;;
- *:*)
- # If no port is specified, use 22 (ssh default port)
- build_container_port=22
- ;;
esac
if [ "x$build_container_host" = "x" ]; then
echo "ERROR: ssh:// toolchain_url lacks a host: $toolchain_url."
exit 1
fi
- if [ "x$build_container_port" = "x" ]; then
- echo "ERROR: ssh:// toolchain_url lacks a port: $toolchain_url."
- exit 1
- fi
;;
*)
if [ x"$builder" = x"bmk_board" ]; then
@@ -102,7 +97,7 @@ case "$toolchain_url" in
else
build_container_tag="${builder#*:}"
builder="${builder%:*}"
- if echo "$builder" | grep -q ".*-[0-9]\+"; then
+ if echo "$builder" | grep ".*-[0-9]\+" >/dev/null; then
# Builder is a specific node
docker_host_opt="--arch amd64 --node $builder"
else
@@ -175,7 +170,7 @@ esac
# remotely.
case "$toolchain_url" in
"ssh://"*)
- maybe_remote="ssh -p $build_container_port $build_container_host"
+ maybe_remote="ssh ${build_container_port:+-p$build_container_port} $build_container_host"
;;
*)
maybe_remote=""
@@ -183,7 +178,12 @@ case "$toolchain_url" in
esac
case "$toolchain_type" in
- "gnu"|"llvm") ;;
+ "gnu") ;;
+ "llvm")
+ if [ x"$($maybe_remote find "$toolchaindir" -path "*bin/*flang-new" | wc -l)" != x"0" ]; then
+ support_fortran_opt="--support_fortran"
+ fi
+ ;;
"auto")
if [ x"$($maybe_remote find "$toolchaindir" -path "*bin/*gcc" | wc -l)" != x"0" ]; then
toolchain_type="gnu"
@@ -215,7 +215,10 @@ case "$toolchain_url" in
ccprefix=$(echo "$ccpath" | sed -e "s/$ccname\$//")
# Copy toolchain to the build container.
- rsync -a --delete -e "ssh -p$build_container_port" "$toolchaindir/" "$build_container_host:$toolchaindir/"
+ ssh ${build_container_port:+-p$build_container_port} \
+ $build_container_host mkdir -p "$toolchaindir"
+ rsync -a --del -e "ssh ${build_container_port:+-p$build_container_port}" \
+ "$toolchaindir/" "$build_container_host:$toolchaindir/"
if [ x"$builder" != x"bmk_board" ]; then
ccprefix="$build_container_host:$build_container_port:$ccprefix"
fi
@@ -229,7 +232,10 @@ case "$sysroot" in
"http://"*|"https://"*)
sysrootdir=$(untar_url "$sysroot" "$WORKSPACE" "--strip-components 1")
# Copy toolchain to the build container.
- rsync -a --delete -e "ssh -p$build_container_port" "$sysrootdir/" "$build_container_host:$sysrootdir/"
+ ssh ${build_container_port:+-p$build_container_port} \
+ $build_container_host mkdir -p "$sysrootdir"
+ rsync -a --del -e "ssh ${build_container_port:+-p$build_container_port}" \
+ "$sysrootdir/" "$build_container_host:$sysrootdir/"
sysroot="$build_container_host:$build_container_port:$sysrootdir"
;;
"ssh://"*)
@@ -255,12 +261,6 @@ case "$sysroot" in
esac
}
-if echo "$results_id" | grep -q "\.\."; then
- echo "ERROR: results_id should not escape /home/tcwg-benchmark/results* hierarchy; do not use \"..\""
- exit 1
-fi
-
-hw_tag="${results_id%%/*}"
case "$hw_tag:$boardname:$image_arch" in
apm_32:*-apm-*:armhf) ;;
apm_64:*-apm-*:arm64) ;;
@@ -270,8 +270,12 @@ case "$hw_tag:$boardname:$image_arch" in
tk1_32:*-tk1-*:armhf) ;;
tx1_64:*-tx1-*:arm64) ;;
tx1_32:*-tx1-*:armhf) ;;
+ fx_32:*-fx-*:armhf) ;;
+ fx_64:*-fx-*:arm64) ;;
+ qc_32:*-qc-*:armhf) ;;
+ qc_64:*-qc-*:arm64) ;;
*)
- echo "ERROR: results_id does not start with a valid hw_tag: $hw_tag"
+ echo "ERROR: hw_tag parameter is not valid : $hw_tag"
exit 1
;;
esac
@@ -302,35 +306,73 @@ force_power_cycle=false
while [ $tries_left != 0 ]; do
tries_left=$(($tries_left-1))
- if $reboot; then
- if ! ssh "$boardname" true || $force_power_cycle; then
+ if timeout 1m ssh "$boardname" true; then
+ ssh_cmd="ssh"
+ wait_opts=()
+ elif timeout 1m ssh -p22 -lroot "$boardname" true; then
+ ssh_cmd="ssh -p22 -lroot"
+ wait_opts=(-p22 -lroot)
+ else
+ ssh_cmd="false"
+ wait_opts=(-p22 -lroot)
+ reboot=true
+ force_power_cycle=true
+ tries_left=0
+ fi
+
+ if $prepare_board; then
+ if ! $reboot; then
+ # Check board for kernel panics and reboot, if any.
+ dmesg_file="$boardname.dmesg-$(date +%s)"
+ timeout 1m $ssh_cmd "$boardname" dmesg -l emerg 2>&1 \
+ | tee "$dmesg_file-emerg"
+ if [ x"$(cat "$dmesg_file-emerg" | wc -l)" != x"0" ]; then
+ reboot=true
+ timeout 1m $ssh_cmd "$boardname" dmesg 2>&1 \
+ | tee "$dmesg_file"
+ else
+ # Remove empty dmesg reports, but keep non-empty ones for
+ # offline analysis -- e.g., to understand frequency and
+ # nature of kernel panics.
+ rm "$dmesg_file-emerg"
+ fi
+ fi
+
+ if $force_power_cycle; then
echo "Trying to power-cycle $boardname"
(
pdu_name=$(echo "${boardname%.tcwglab}" \
| sed -e 's/^tcwg-bmk-/tcwg-/')
nvidia-power-cycle.sh "$pdu_name"
- wait_for_ssh_server "$boardname" 22 100
+ wait_for_ssh_server "$boardname" 150 "${wait_opts[@]}"
) &
wait $! || exit $EXTERNAL_FAIL
echo "Successfully powered-cycled $boardname"
- else
+ elif $reboot; then
+ echo "Trying to reboot $boardname"
# Reboot the board.
# Ping board every second (ServerAliveInterval=1) to avoid
# waiting [default] 5min for ssh to break connection.
- ssh -Snone -oServerAliveInterval=1 $boardname sudo /sbin/reboot \
- || true
+ $ssh_cmd -Snone -oServerAliveInterval=1 $boardname \
+ sudo reboot || true
+
# Wait until the ssh server is ready
sleep 30 # Give time to the board to shutdown
- ret=0
- wait_for_ssh_server $boardname 22 100 || ret=$?
+ wait_for_ssh_server "$boardname" 150 "${wait_opts[@]}" &
+ ret=0 && wait $! || ret=$?
if [ $ret != 0 ]; then
- echo "SSH server did not respond after reboot, exiting."
- exit $EXTERNAL_FAIL
+ echo "SSH server did not respond after reboot"
fi
fi
fi
- rsync -az --delete bmk-scripts/ "$boardname:bmk-scripts/" &
+ (
+ if $prepare_board; then
+ $scripts/tcwg-update-bmk-containers.sh --board "$boardname" \
+ --test_docker true
+ fi
+ rsync -az --del bmk-scripts/ "$boardname:bmk-scripts/"
+ ) &
res=0 && wait $! || res=$?
if [ $res = 0 ]; then
break
@@ -354,9 +396,8 @@ if $prepare_board; then
# check that there are no stray processes
# test that taskset works
remote_exec "$boardname:::-t -Snone" \
- sudo /usr/local/bin/benchmark.sh --hw_tag "$hw_tag" \
- --action start_board --verbose \
- --image "linaro/ci-$image_arch-tcwg-build-ubuntu:$bench_container_tag" &
+ sudo bmk-scripts/prepare-board.sh --hw_tag "$hw_tag" \
+ --action start_board --verbose &
res=0 && wait $! || res=$?
if [ $res != 0 ]; then
@@ -371,7 +412,7 @@ trap "cleanup_all_containers" EXIT
# Start a container to run the benchmarks in.
# We install SPEC in /home/tcwg-benchmark, so bind-mount it as $WORKSPACE.
-WORKSPACE=$HOME $scripts/start-container-docker.sh --session-host "$boardname" --arch "$image_arch" --distro "$bench_container_tag" --task bench --docker_opts "--privileged" --prefix run_ > run-container.sh &
+WORKSPACE=$HOME $scripts/start-container-docker.sh --session-host "$boardname" --arch "$image_arch" --distro "$bench_container_tag" --task bench --security "--privileged" --prefix run_ > run-container.sh &
res=0 && wait $! || res=$?
if [ $res != 0 ]; then
@@ -393,18 +434,26 @@ case "$bench_list" in
--cflags "$cflags" \
--ldflags "$ldflags" \
--forceinstall "true" \
- --resultsdest "bkp-01.tcwglab:/home/tcwg-benchmark/results-${results_id}/$boardname" \
+ --resultsdest "${results_dest}/$boardname" \
--verbose true
;;
- *)
+ *) # any others keywords corresponds to spec2xxx (either 2006 or 2017)
case "$testmode" in
- build|verify) input_size="test" ;;
+ build) input_size="test" ;;
+ verify) input_size="train" ;;
benchmark) input_size="ref" ;;
esac
+
+ #spec_config follows run_profile
+ case "$run_profile" in
+ serial) config="serial" ;;
+ parallel|parallel_*) config="parallel" ;;
+ esac
+
remote_exec "$run_container_host:$run_container_port::-t -Snone" \
bmk-scripts/run.sh \
--bench "$bench_list" \
- --config "$run_profile" \
+ --config "$config" \
--cflags "$cflags" \
--ldflags "$ldflags" \
--ccprefix "$ccprefix" \
@@ -416,7 +465,8 @@ case "$bench_list" in
--run_profile "$run_profile" \
${sysroot:+--sysroot "$sysroot"} \
--toolchain "$toolchain_type" \
- --resultsdest "bkp-01.tcwglab:/home/tcwg-benchmark/results-${results_id}/$boardname" \
+ $support_fortran_opt \
+ --resultsdest "${results_dest}/$boardname" \
--nodename "$boardname" \
--forceinstall "${forceinstall}" \
${clean_older_than:+--clean_older_than "$clean_older_than"} \
@@ -426,7 +476,7 @@ esac
if $prepare_board; then
remote_exec "$boardname:::-t -Snone" \
- sudo /usr/local/bin/benchmark.sh --action stop_board --verbose &
+ sudo bmk-scripts/prepare-board.sh --action stop_board --verbose &
res=0 && wait $! || res=$?
if [ $res != 0 ]; then
echo "Warning: prepare-board.sh did not finish cleanly"