#!/bin/bash

# Clean: shellcheck -e 2001 ./tcwg-benchmark.sh

set -eu

scripts=$(dirname "$0")
# shellcheck source=jenkins-helpers.sh
. $scripts/jenkins-helpers.sh

convert_args_to_variables "$@"
obligatory_variables \
    boardname \
    image_arch \
    toolchain_url \
    bench_list \
    cflags \
    ldflags \
    extension \
    testmode \
    iterations \
    run_profile \
    sysroot \
    forceinstall \
    builder \
    results_dest \
    WORKSPACE \
    reboot \
    ignore_errors \
    clean_older_than \
    hw_tag
declare -g \
	boardname \
	image_arch \
	toolchain_url \
	bench_list \
	cflags \
	ldflags \
	extension \
	testmode \
	iterations \
	run_profile \
	sysroot \
	forceinstall \
	builder \
	results_dest \
	WORKSPACE \
	reboot \
	ignore_errors \
	clean_older_than \
	hw_tag

# Make shellcheck happy and workaround Jenkins not defining variables
# for empty arguments.
bench_container_tag="${bench_container_tag-default}"
toolchain_type="${toolchain_type-auto}"
prepare_board="${prepare_board-true}"
verbose="${verbose-true}"
support_fortran_opt=""

if $verbose; then
    set -x
fi

prepare_toolchain ()
{
# If $toolchain_url is of ssh:// type, don't create a remote build
# container, just use the ssh command as provided.
build_container_host=
build_container_port=
case "$toolchain_url" in
    "ssh://"*)
	ccprefix="${toolchain_url##ssh://}"

	# Extract host:port: specification from ccprefix, we don't
	# need to care about :parallelize here, just pass it to run.sh
	# if present.
	build=${ccprefix%:*}
	build_container_host="$(echo $build | cut -d: -f 1)"
	case ${ccprefix} in
	    *:*:*)
		build_container_port="$(echo $build | cut -s -d: -f 2)"
		;;
	esac

	if [ "x$build_container_host" = "x" ]; then
	    echo "ERROR: ssh:// toolchain_url lacks a host: $toolchain_url."
	    exit 1
	fi
	;;
    *)
	if [ x"$builder" = x"bmk_board" ]; then
	    # shellcheck disable=SC2154
	    build_container_host=$run_container_host
	    # shellcheck disable=SC2154
	    build_container_port=$run_container_port
	else
	    build_container_tag="${builder#*:}"
	    builder="${builder%:*}"
	    if echo "$builder" | grep ".*-[0-9]\+" >/dev/null; then
		# Builder is a specific node
		docker_host_opt="--arch amd64 --node $builder"
	    else
		docker_host_opt="--label $builder"
	    fi

	    $scripts/start-container-docker.sh $docker_host_opt --distro "$build_container_tag" --task build --prefix build_ > build-container.sh
	    . ./build-container.sh
	fi
	;;
esac

case "$toolchain_url" in
    "ssh://"*)
	if [ x"$sysroot" = x"tarball" ]; then
            echo "ERROR: Unsupported sysroot $sysroot for toolchain_url $toolchain_url"
            exit 1
	fi
	# Last component of ccprefix is the path, keep it
	toolchaindir="$(dirname ${ccprefix##*:})"
	;;
    "http://"*".tar.xz"|"https://"*".tar.xz")
	toolchaindir=$(untar_url "$toolchain_url" "$WORKSPACE" "--strip-components 1")
	;;
    "rsync://"*)
	ccprefix="${toolchain_url##rsync://}"

	# Extract host:port: specification from ccprefix, we don't
	# need to care about :parallelize here, just pass it to run.sh
	# if present.
	rsync_spec=${ccprefix%:*}
	rsync_host="$(echo $rsync_spec | cut -d: -f 1)"
	case ${ccprefix} in
	    *:*:*)
		rsync_port="$(echo $rsync_spec | cut -s -d: -f 2)"
		;;
	    *:*)
		# If no port is specified, use 22 (ssh default port)
		rsync_port=22
		;;
	esac
	# We want to access the remote toolchain via a container, to
	# avoid problems with the hosts's ssh server restrictions on the
	# number of simulaneous connexions.
	# We copy it to the build container (assuming it uses the same
	# architecture as the machine pointed to by $toolchain_url).
	# Assume ccprefix looks like /path/bin/target-triplet-, and
	# compute 'path'.
	src_toolchaindir=$(dirname "$(dirname ${ccprefix##*:})")
	toolchaindir="${WORKSPACE}/toolchain"
	rsync -az --delete -e "ssh -p$rsync_port" \
	      "$rsync_host:$src_toolchaindir/" "$toolchaindir/"
	;;
    *)
	echo "ERROR: Cannot handle toolchain_url: $toolchain_url"
	exit 1
	;;
esac

# Sanity check that toolchain_type is supported
case "$toolchain_type" in
    gnu|llvm) ;;
    *)
	echo "ERROR: Unsupported toolchain type: $toolchain_type"
	exit 1
	;;
esac

# In the ssh:// case, we have to perform the 'find' operations
# remotely.
case "$toolchain_url" in
    "ssh://"*)
	maybe_remote="ssh ${build_container_port:+-p$build_container_port} $build_container_host"
	;;
    *)
	maybe_remote=""
	;;
esac

case "$toolchain_type" in
    "gnu") ;;
    "llvm")
	if [ x"$($maybe_remote find "$toolchaindir" -path "*bin/*flang-new" | wc -l)" != x"0" ]; then
	    support_fortran_opt="--support_fortran"
	fi
	;;
    "auto")
	if [ x"$($maybe_remote find "$toolchaindir" -path "*bin/*gcc" | wc -l)" != x"0" ]; then
	    toolchain_type="gnu"
	elif [ x"$($maybe_remote find "$toolchaindir" -path "*bin/*clang" | wc -l)" != x"0" ]; then
	    toolchain_type="llvm"
	else
	    echo "ERROR: Cannot autodetect toolchain type"
	    exit 1
	fi
	;;
esac

# Non-ssh:// cases have to copy the just-copied toolchain to
# the remote build container. For ssh://, we'll access the
# toolchain remotely.
case "$toolchain_url" in
    "ssh://"*) ;;
    *)
	case "$toolchain_type" in
	    "gnu") ccname="gcc" ;;
	    "llvm") ccname="clang" ;;
	esac

	ccpath=$($maybe_remote find "$toolchaindir" -path "*bin/*$ccname")
	if [ "$(echo "$ccpath" | wc -w)" -ne 1 ]; then
	    echo "ERROR: found more than one compiler: $ccpath"
	    exit 1
	fi

	ccprefix=$(echo "$ccpath" | sed -e "s/$ccname\$//")
	# Copy toolchain to the build container.
	ssh ${build_container_port:+-p$build_container_port} \
	    $build_container_host mkdir -p "$toolchaindir"
	rsync -a --del -e "ssh ${build_container_port:+-p$build_container_port}" \
	      "$toolchaindir/" "$build_container_host:$toolchaindir/"
	if [ x"$builder" != x"bmk_board" ]; then
	    ccprefix="$build_container_host:$build_container_port:$ccprefix"
	fi
	;;
esac

case "$sysroot" in
    "tarball")
	sysroot="$build_container_host:$build_container_port:$(find "$toolchaindir" -name "libc")"
	;;
    "http://"*|"https://"*)
	sysrootdir=$(untar_url "$sysroot" "$WORKSPACE" "--strip-components 1")
	# Copy toolchain to the build container.
	ssh ${build_container_port:+-p$build_container_port} \
	    $build_container_host mkdir -p "$sysrootdir"
	rsync -a --del -e "ssh ${build_container_port:+-p$build_container_port}" \
	      "$sysrootdir/" "$build_container_host:$sysrootdir/"
	sysroot="$build_container_host:$build_container_port:$sysrootdir"
	;;
    "ssh://"*)
	sysroot="${sysroot##ssh://}"

	# Check host:port specification from sysroot.
	case ${sysroot} in
	    *:*) ;;
	    *)
		echo "ERROR: ssh:// sysroot lacks a host: $sysroot"
		exit 1
		;;
	esac
	;;

    "")
	# Use system sysroot.
	;;
    *)
	echo "ERROR: Cannot handle sysroot: $sysroot"
	exit 1
	;;
esac
}

case "$hw_tag:$boardname:$image_arch" in
    apm_32:*-apm-*:armhf) ;;
    apm_64:*-apm-*:arm64) ;;
    sq_32:*-sq-*:armhf) ;;
    sq_64:*-sq-*:arm64) ;;
    stm32:dev-*:amd64) ;;
    tk1_32:*-tk1-*:armhf) ;;
    tx1_64:*-tx1-*:arm64) ;;
    tx1_32:*-tx1-*:armhf) ;;
    fx_32:*-fx-*:armhf) ;;
    fx_64:*-fx-*:arm64) ;;
    qc_32:*-qc-*:armhf) ;;
    qc_64:*-qc-*:arm64) ;;
    *)
	echo "ERROR: hw_tag parameter is not valid : $hw_tag"
	exit 1
	;;
esac

# Check that we can ssh to the board and rsync scripts.  This ensures that
# the board is online and filesystem is good condition.  Try to reboot and/or
# power-cycle the board as needed.
case "$hw_tag:$reboot" in
    stm32:*)
	# 1. If the host machine isn't available on the 1st try -- give up.
	tries_left=1
	reboot=false
	prepare_board=false
	;;
    *:true)
	# 1. Try access after soft reboot
	# 2. Try access after power-cycle
	tries_left=2
	;;
    *)
	# 1. Try access without rebooting
	# 2. Try access after soft reboot
	# 3. Try access after power-cycle
	tries_left=3
	;;
esac
force_power_cycle=false
while [ $tries_left != 0 ]; do
    tries_left=$(($tries_left-1))

    if timeout 1m ssh "$boardname" true; then
	ssh_cmd="ssh"
	wait_opts=()
    elif timeout 1m ssh -p22 -lroot "$boardname" true; then
	ssh_cmd="ssh -p22 -lroot"
	wait_opts=(-p22 -lroot)
    else
	ssh_cmd="false"
	wait_opts=(-p22 -lroot)
	reboot=true
	force_power_cycle=true
	tries_left=0
    fi

    if $prepare_board; then
	if ! $reboot; then
	    # Check board for kernel panics and reboot, if any.
	    dmesg_file="$boardname.dmesg-$(date +%s)"
	    timeout 1m $ssh_cmd "$boardname" dmesg -l emerg 2>&1 \
		| tee "$dmesg_file-emerg"
	    if [ x"$(cat "$dmesg_file-emerg" | wc -l)" != x"0" ]; then
		reboot=true
		timeout 1m $ssh_cmd "$boardname" dmesg 2>&1 \
		    | tee "$dmesg_file"
	    else
		# Remove empty dmesg reports, but keep non-empty ones for
		# offline analysis -- e.g., to understand frequency and
		# nature of kernel panics.
		rm "$dmesg_file-emerg"
	    fi
	fi

	if $force_power_cycle; then
	    echo "Trying to power-cycle $boardname"
	    (
		pdu_name=$(echo "${boardname%.tcwglab}" \
			       | sed -e 's/^tcwg-bmk-/tcwg-/')
		nvidia-power-cycle.sh "$pdu_name"
		wait_for_ssh_server "$boardname" 150 "${wait_opts[@]}"
	    ) &
	    wait $! || exit $EXTERNAL_FAIL
	    echo "Successfully powered-cycled $boardname"
	elif $reboot; then
	    echo "Trying to reboot $boardname"
	    # Reboot the board.
	    # Ping board every second (ServerAliveInterval=1) to avoid
	    # waiting [default] 5min for ssh to break connection.
	    $ssh_cmd -Snone -oServerAliveInterval=1 $boardname \
		     sudo reboot || true

	    # Wait until the ssh server is ready
	    sleep 30 # Give time to the board to shutdown
	    wait_for_ssh_server "$boardname" 150 "${wait_opts[@]}" &
	    ret=0 && wait $! || ret=$?
	    if [ $ret != 0 ]; then
		echo "SSH server did not respond after reboot"
	    fi
	fi
    fi

    (
	if $prepare_board; then
	    $scripts/tcwg-update-bmk-containers.sh --board "$boardname" \
						   --test_docker true
	fi
	rsync -az --del bmk-scripts/ "$boardname:bmk-scripts/"
    ) &
    res=0 && wait $! || res=$?
    if [ $res = 0 ]; then
	break
    else
	reboot=true
	if [ x"$tries_left" = x"1" ]; then
	    force_power_cycle=true
	fi
    fi
done

if [ $res != 0 ]; then
    echo "ERROR: Could not get board online"
    exit $EXTERNAL_FAIL
fi

if $prepare_board; then
    # FIXME: Implement more configurations and checks:
    # disable swap
    # set interrupt affinity
    # check that there are no stray processes
    # test that taskset works
    remote_exec "$boardname:::-t -Snone" \
		sudo bmk-scripts/prepare-board.sh --hw_tag "$hw_tag" \
		--action start_board --verbose &

    res=0 && wait $! || res=$?
    if [ $res != 0 ]; then
	echo "ERROR: Could not prepare board for benchmarking"
	exit $EXTERNAL_FAIL
    fi
fi

# Make sure to cleanup build container if something goes
# wrong when preparing the test environment
trap "cleanup_all_containers" EXIT

# Start a container to run the benchmarks in.
# We install SPEC in /home/tcwg-benchmark, so bind-mount it as $WORKSPACE.
WORKSPACE=$HOME $scripts/start-container-docker.sh --session-host "$boardname" --arch "$image_arch" --distro "$bench_container_tag" --task bench --security "--privileged" --prefix run_ > run-container.sh &

res=0 && wait $! || res=$?
if [ $res != 0 ]; then
    echo "ERROR: Could not start benchmarking container"
    exit $EXTERNAL_FAIL
fi

trap "cleanup_all_containers" EXIT
. ./run-container.sh
declare -g run_container_host run_container_port

prepare_toolchain

case "$bench_list" in
    coremark)
	remote_exec "$run_container_host:$run_container_port:bmk-scripts:-t -Snone" \
		    ./coremark.sh \
		    --ccprefix "$ccprefix" \
		    --cflags "$cflags" \
		    --ldflags "$ldflags" \
		    --forceinstall "true" \
		    --resultsdest "${results_dest}/$boardname" \
		    --verbose true
	;;
    *) # any others keywords corresponds to spec2xxx (either 2006 or 2017)
	case "$testmode" in
	    build) input_size="test" ;;
	    verify) input_size="train" ;;
	    benchmark) input_size="ref" ;;
	esac

	#spec_config follows run_profile
	case "$run_profile" in
	    serial) config="serial" ;;
	    parallel|parallel_*) config="parallel" ;;
	esac

	remote_exec "$run_container_host:$run_container_port::-t -Snone" \
		    bmk-scripts/run.sh \
		    --bench "$bench_list" \
		    --config "$config" \
		    --cflags "$cflags" \
		    --ldflags "$ldflags" \
		    --ccprefix "$ccprefix" \
		    --extension "$extension" \
		    --hw_tag "$hw_tag" \
		    --ignore_errors "$ignore_errors" \
		    --input_size "$input_size" \
		    --iterations "$iterations" \
		    --run_profile "$run_profile" \
		    ${sysroot:+--sysroot "$sysroot"} \
		    --toolchain "$toolchain_type" \
		    $support_fortran_opt \
		    --resultsdest "${results_dest}/$boardname" \
		    --nodename "$boardname" \
		    --forceinstall "${forceinstall}" \
		    ${clean_older_than:+--clean_older_than "$clean_older_than"} \
		    --verbose true
	;;
esac

if $prepare_board; then
    remote_exec "$boardname:::-t -Snone" \
		sudo bmk-scripts/prepare-board.sh --action stop_board --verbose &
    res=0 && wait $! || res=$?
    if [ $res != 0 ]; then
	echo "Warning: prepare-board.sh did not finish cleanly"
    fi
fi