#!/bin/bash

set -euf -o pipefail

scripts=$(dirname $0)
# shellcheck source=jenkins-helpers.sh
. $scripts/jenkins-helpers.sh
# shellcheck source=round-robin.sh
. $scripts/round-robin.sh

convert_args_to_variables "$@"

obligatory_variables rr[ci_project] rr[ci_config]
declare -A rr

# Execution mode: build or bisect
rr[mode]="${rr[mode]-build}"

# Set custom revision for one of the projects, and use baseline revisions
# for all other projects.
rr[baseline_branch]="${rr[baseline_branch]-linaro-local/ci/${rr[ci_project]}/${rr[ci_config]}}"
rr[update_baseline]="${rr[update_baseline]-ignore}"
rr[top_artifacts]="${rr[top_artifacts]-$(pwd)/artifacts}"

# store date of the run
rr[run_date]="$(date --utc --iso-8601=seconds)"

# {toolchain_name}-{toolchain_ver}-{target}-{bmk}-{cflags}
IFS=- read -a ci_config <<EOF
${rr[ci_config]}
EOF
rr[toolchain]=${rr[toolchain]-${ci_config[0]}}
rr[target]=${rr[target]-${ci_config[2]}}
benchmarks=("${benchmarks[@]-${ci_config[3]}}")
if [ x"${benchmarks[*]}" = x"default" ]; then
    benchmarks=("${ci_config[3]}")
fi
cflags="${cflags--${ci_config[4]}}"

metric_id="${metric_id-cflags}"

gcc_mode=""
case "${rr[target]}:$cflags" in
    "arm:"*"mthumb"*) gcc_mode=thumb ;;
    "arm:"*"marm"*) gcc_mode=arm ;;
    "arm:-Os"*|"arm:-Oz"*)
	gcc_mode=thumb
	cflags="${cflags}_mthumb"
	;;
    "arm:"*)
	gcc_mode=arm
	cflags="${cflags}_marm"
	;;
    "arm_eabi:"*)
	cflags="${cflags}_mthumb"
	;;
esac

gcc_override_configure=()
# Set default ARM/Thumb mode for AArch32 compiler.  This ensures that libraries
# (Glibc, libgcc, libstdc++, libgfortran, etc.) are built in the "interesting"
# ISA.
case "$gcc_mode" in
    arm|thumb) gcc_override_configure+=("--set" "gcc_override_configure=--with-mode=$gcc_mode") ;;
esac
# Disable libsanitizer because it didn't build in April-June 2018.
# We need to test revisions during that time period for performance regressions.
gcc_override_configure+=("--set" "gcc_override_configure=--disable-libsanitizer")

# Build the right libs depending on the target. Hardcode the
# cpu/float-abit/mode to speed up toolchain builds: the alternative of
# building rmprofile multilibs takes a very long time. Setting the
# values here has the drawback that we have to build one toolchain per
# board type.
case "${rr[target]}" in
    "arm_eabi") gcc_override_configure+=("--set" "gcc_override_configure=--disable-multilib"
					 "--set" "gcc_override_configure=--with-cpu=cortex-m4"
					 "--set" "gcc_override_configure=--with-mode=thumb"
					 "--set" "gcc_override_configure=--with-float=hard"
					) ;;
esac

case "${rr[toolchain]}" in
    llvm)
      rr[components]="binutils gcc linux glibc llvm" ;;
    gnu)
      rr[components]="binutils gcc linux glibc" ;;
    gnu_eabi)
      rr[components]="binutils gcc newlib" ;;
    *) assert_with_msg "Unknown toolchain \"${rr[toolchain]}\"" false ;;
esac

# Use baseline branches by default.
for c in ${rr[components]}; do
    rr[${c}_git]=${rr[${c}_git]-baseline}
done

start_at="${start_at-default}"
finish_at="${finish_at-default}"
verbose="${verbose-true}"
verbose2="${verbose2-false}"

if $verbose2; then set -x; fi

trap print_traceback EXIT

# Set start and finish steps for different modes.
default_start_at=""
default_finish_at=""
case "${rr[mode]}" in
    "bisect")
	single_updated_component="$(print_single_updated_component)"
	case $single_updated_component in
	    binutils) default_start_at="build_abe-binutils" ;;
	    gcc) default_start_at="build_abe-stage1" ;;
	    linux|glibc) default_start_at="clean_sysroot" ;;
	    llvm) default_start_at="build_llvm-true" ;;
	    newlib) default_start_at="build_abe-newlib" ;;
	    *) assert_with_msg \
		   "Invalid single updated component \"$single_updated_component\"" false
	       ;;
	esac
	;;
esac
if [ x"$start_at" = x"default" ]; then
    start_at="$default_start_at"
fi
if [ x"$finish_at" = x"default" ]; then
    finish_at="$default_finish_at"
fi

run_step_init "$start_at" "$finish_at" "${rr[top_artifacts]}" "$verbose"

benchmark ()
{
    obligatory_variables ssh_host ssh_port

    (
    set -euf -o pipefail

    local bmk_cflags="$2"
    local results_id_file="$3"

    sanity_check_pwd

    # shellcheck disable=SC2115
    rm -rf "$(pwd)"/bin
    mkdir "$(pwd)"/bin

    local bmk_flags bmk_ldflags reboot run_profile bmk_mode

    # Use of _VECT in cflags is a historic artefact when we passed special
    # flags to generate vectorization data.
    #
    # TODO: We need to move vectorization $ci_configs to a new $ci_project,
    # since there is expected to be few regressions common to both speed
    # and vectorization benchmarking.
    bmk_flags="$(echo $bmk_cflags | sed -e "s/_VECT//g" -e "s/_/ -/g" -e "s/LTO/flto/g")"

    case "$bmk_cflags" in
	"-Os"*|"-Oz"*)
	    reboot=false
	    run_profile="parallel"
	    bmk_mode="code_size"
	    ;;
	*)
	    reboot=true
	    run_profile="serial"
	    bmk_mode="code_speed"
	    ;;
    esac

    # Runs always in parallel for cpu2017
    # benchmark can be either spec2k6, cpu2017, or a subset of spec2xxx
    if [ "${benchmarks[*]}" == "cpu2017" ]; then
        run_profile="parallel"
    fi

    local bench_list bin cc gnu_host gnu_target sysroot toolchain
    gnu_host=$(print_gnu_target native)
    gnu_target=$(print_gnu_target ${rr[target]})
    sysroot="$(pwd)/abe/builds/destdir/$gnu_host/$gnu_target/libc"
    case "${rr[toolchain]}" in
	llvm)
	    local llvm_target
	    llvm_target=$(echo "$gnu_target" | sed -e "s/^arm-/armv7a-/")
	    bmk_flags="$bmk_flags --target=$llvm_target --sysroot=$sysroot"
	    bmk_ldflags="$bmk_flags"
	    # Use LLD for LLVM configurations.
	    # Also, BFD linker crashes for AArch32 LTO builds,
	    # see https://projects.linaro.org/browse/LLVM-562 .
	    case "$bmk_ldflags" in
		*"-fuse-ld="*) ;;
		*) bmk_ldflags="$bmk_ldflags -fuse-ld=lld" ;;
	    esac
	    bin="$(pwd)/llvm-install/bin"
	    cc="$bin/"
	    toolchain="llvm"
	    ;;
	gnu|gnu_eabi)
	    bmk_ldflags="$bmk_flags"
	    bin="$(pwd)/abe/builds/destdir/$gnu_host/bin"
	    cc="$bin/$gnu_target-"
	    toolchain="gnu"
	    # Append -fdump-statistics-asmname to obtain compile time metrics.
	    bmk_flags="$bmk_flags -fdump-statistics-asmname -fdump-tree-vect-details"
	    ;;
    esac
    case "${rr[toolchain]}:${benchmarks[*]}:$bmk_mode" in
	llvm:spec2k6:*) bench_list="c_and_cxx" ;;
	gnu:spec2k6:*) bench_list="all" ;;
	llvm:cpu2017:code_speed) bench_list="spec2017_speed_nofortran" ;;
	gnu:cpu2017:code_speed) bench_list="spec2017_speed" ;;
	llvm:cpu2017:code_size) bench_list="spec2017_rate_nofortran" ;;
	gnu:cpu2017:code_size) bench_list="spec2017_rate" ;;
	*) bench_list="${benchmarks[*]}" ;;
    esac
    if [ x"${rr[ci_project]}" = x"tcwg_bmk_llvm_tx1" ]; then
	# Workaround instabilities of 433.milc and 464.h264ref in AArch64
	# LLVM speed benchmarking.
	# See https://linaro.atlassian.net/browse/LLVM-722
	bench_list="400.perlbench 401.bzip2 403.gcc 429.mcf 444.namd 445.gobmk 447.dealII 450.soplex 453.povray 456.hmmer 458.sjeng 462.libquantum 470.lbm 471.omnetpp 473.astar 482.sphinx3 483.xalancbmk"
    fi
    # shellcheck disable=SC2154
    sysroot="ssh://$ssh_host:$ssh_port:$sysroot"

    local hw_tag
    case "${rr[ci_project]}:${rr[target]}" in
	*_apm_32*:*) hw_tag=apm_32 ;;
	*_apm_64*:*) hw_tag=apm_64 ;;
	*_apm*:arm*) hw_tag=apm_32 ;;
	*_apm*:aarch64) hw_tag=apm_64 ;;
	*_sq_32*:*) hw_tag=sq_32 ;;
	*_sq_64*:*) hw_tag=sq_64 ;;
	*_sq*:arm*) hw_tag=sq_32 ;;
	*_sq*:aarch64) hw_tag=sq_64 ;;
	*_tk1_32*:*) hw_tag=tk1_32 ;;
	*_tk1*:arm*) hw_tag=tk1_32 ;;
	*_tx1_32*:*) hw_tag=tx1_32 ;;
	*_tx1_64*:*) hw_tag=tx1_64 ;;
	*_tx1*:arm*) hw_tag=tx1_32 ;;
	*_tx1*:aarch64) hw_tag=tx1_64 ;;
	*_stm32*:arm*) hw_tag=stm32 ;;
	*fx_32*:*) hw_tag=fx_32 ;;
	*fx_64*:*) hw_tag=fx_64 ;;
	*fx*:arm*) hw_tag=fx_32 ;;
	*fx*:aarch64) hw_tag=fx_64 ;;
	*) echo "ERROR: Unknown hw_tag for ${rr[ci_project]}:${rr[target]}"; exit 1 ;;
    esac

    local hw image_arch toolchain_proto
    toolchain_proto=ssh
    case "$hw_tag" in
	apm_32) hw=apm; image_arch=armhf ;;
	apm_64) hw=apm; image_arch=arm64 ;;
	sq_32) hw=sq; image_arch=armhf ;;
	sq_64) hw=sq; image_arch=arm64 ;;
	tk1_32) hw=tk1; image_arch=armhf ;;
	tx1_32) hw=tx1; image_arch=armhf ;;
	tx1_64) hw=tx1; image_arch=arm64 ;;
	stm32)
	    hw=stm32; image_arch=amd64
	    # When running benchmarks on stm32, we prefer to rsync the
	    # toolchain to the board's host machine -- dev-02.tcwglab.
	    toolchain_proto=rsync
	    ;;
	fx_32) hw=fx; image_arch=armhf ;;
	fx_64) hw=fx; image_arch=arm64 ;;
	*) echo "ERROR: Unknown hw_tag $hw_tag"; exit 1 ;;
    esac

    local results_id="$hw_tag/${rr[ci_project]}/${rr[mode]}-${rr[ci_config]}/@build_num@"

    # Trigger benchmarking job and capture its console output.
    # Ignore exit code of the trigger command to detect various failure
    # conditions from examining the console log.
    # shellcheck disable=SC2154
    remote_exec "ci.linaro.org:2222::-l $USER@linaro.org" \
		build tcwg-benchmark -f -v \
		-p bmk_hw=$hw \
		-p bench_list="$bench_list" \
		-p cflags="$bmk_flags" \
		-p ldflags="$bmk_ldflags" \
		-p testmode=benchmark \
		-p displaytag="${rr[ci_project]}/${rr[mode]}-${rr[ci_config]}" \
		-p ignore_errors=true \
		-p toolchain_url=$toolchain_proto://$ssh_host:$ssh_port:$cc \
		-p toolchain_type=$toolchain \
		-p sysroot="$sysroot" \
		-p results_id="$results_id" \
		-p reboot="$reboot" \
		-p run_profile="$run_profile" \
		-p image_arch="$image_arch" \
		${scripts_branch+-p scripts_branch="$scripts_branch"} \
		${bmk_branch+-p bmk_branch="$bmk_branch"} \
	| tee $run_step_artifacts/benchmark-build.log || true

    local build_num
    build_num=$(head -n1 $run_step_artifacts/benchmark-build.log \
		    | sed -e "s/Started.*#\([0-9]\+\).*/\1/")
    assert_with_msg "Benchmark build number should not be 0!" \
		    [ "$build_num" -gt "0" ]

    # The "build" output adds "Completed <build>" to the console output.
    # Strip this last line.
    head -n -1 $run_step_artifacts/benchmark-build.log \
	 > $run_step_artifacts/benchmark.log

    local build_status
    local build_ret
    while true; do
	# Ssh connection to ci.linaro.org occasionally drops.  We need
	# to check whether benchmarking has finished, and, if not, continue
	# to watch its output.  We detect that the job has finished if the last
	# line of console output is "Finished: <something>".
	build_status=$(tail -n 1 $run_step_artifacts/benchmark.log)
	case "$build_status" in
	    "Finished: SUCCESS")
	        build_ret=0
	        break
	        ;;
	    "Finished: "*)
		echo "# Benchmarking infra is offline:" >> ${rr[top_artifacts]}/results
		echo "-$EXTERNAL_FAIL" >> ${rr[top_artifacts]}/results
	        build_ret=1
	        break
	        ;;
	esac

	# After ci.linaro.org update on 2021-10-11 behavior of console command
	# option has changed: before the update it exited immediately for finished builds,
	# and after the update "console" hangs indefinitely for finished builds.
	# We workaround this by using "timeout 1m".
	sleep 300

	(timeout 1m \
		 ssh -p2222 -l $USER@linaro.org ci.linaro.org \
		 console tcwg-benchmark $build_num || true) \
	    | tee $run_step_artifacts/benchmark.log
    done

    echo "$results_id" | sed -e "s/@build_num@/$build_num/g" \
			     > "$results_id_file"
    return $build_ret
    )
}

# Compare results obtained from metric data between $1 and $2
# and generate results-compare.csv
compare_results ()
{
    (
    set -euf -o pipefail

    local metric_id="$1"
    local ref_results_id="$2"
    local new_results_id="$3"
    local cmp_options="$4"

    local results_ref results_new
    results_ref=$(cat $ref_results_id)
    results_new=$(cat $new_results_id)

    case "${rr[target]}" in
        "arm_eabi")
            cmp_options="$cmp_options --has_perf_logs no"
            ;;
    esac

    $scripts/tcwg-benchmark-results.sh \
        --results_ref $results_ref ++results $results_new \
        --top_artifacts "$run_step_artifacts" --verbose $verbose $cmp_options \
        > $run_step_artifacts/results.log 2>&1 &

    local res
    res=0 && wait $! || res=$?
    if [ $res != 0 ]; then
	return $EXTERNAL_FAIL
    fi

    case "$metric_id:$cflags" in
	cflags:*"VECT"*) metric_id="vect" ;;
	cflags:"-Os"*|cflags:"-Oz"*) metric_id="size" ;;
	cflags:*) metric_id="time" ;;
    esac

    local regressed_by improved_by changed_by exe_threshold symbol_threshold

    case $metric_id in
	size)
	    # We use 1% tolerance for binary size
	    # and 10% tolerance for symbol size.
	    exe_threshold=1
	    symbol_threshold=10
	    regressed_by="grew in size by"
	    improved_by="reduced in size by"
	    ;;
	time)
	    # We use 3% tolerance for binary speed
	    # and 15% tolerance for symbol speed.
	    exe_threshold=3
	    symbol_threshold=15

	    # Reduce thresholds when bisecting to avoid considering borderline
	    # regressions as spurious.  This should break cycles of build and
	    # bisect jobs triggering each other on borderline regressions.
	    if [ x"${rr[mode]}" = x"bisect" ]; then
		exe_threshold=2
		symbol_threshold=10
	    fi

	    regressed_by="slowed down by"
	    improved_by="speeds up by"
	    ;;
	vect)
	    exe_threshold=0
	    symbol_threshold=0
	    regressed_by="reduced by"
	    improved_by="increased up by"
	    ;;
	*) assert false ;;
    esac

    local -a arr
    local metric bmk symbol rtime rsize rvect time1 time2 size1 size2 vect1 vect2
    local long_diag short_symbol short_diag
    local result prev_bmk
    echo "bmk,symbol,result" > $run_step_artifacts/results-compare.csv
    printf "extra_build_params=" > $run_step_artifacts/extra-bisect-params

    assert_with_msg "Found stale regression files" \
		    [ x"$(find $run_step_artifacts/ -name "*.regression" | wc -l)" = x"0" ]

    # Read result lines from <(tail -n +2 ...) below.
    # "-n +2" is to skip the header line.
    prev_bmk=""
    while IFS=, read -a arr; do
	bmk=${arr[0]}
	symbol=${arr[1]}
	rtime=${arr[2]}
	rsize=${arr[3]}
	rvect=${arr[4]}
	# $arr[5] is used to store rel_symbol_md5sum, ignore it.
	time1=${arr[6]}
	time2=${arr[7]}
	size1=${arr[8]}
	size2=${arr[9]}
	vect1=${arr[10]}
	vect2=${arr[11]}
	md5sum1=${arr[12]}
	md5sum2=${arr[13]}

	case $metric_id in
	    size) metric=$rsize ;;
	    time) metric=$rtime ;;
	    vect) metric=$rvect ;;
	    *) assert false ;;
	esac

	# Skip processing time metric if md5sums match.
	# If either md5sum is -1, then it indicates that we don't have checksum
	# computed correctly for this symbol, and thus continue with comparison.
	if [ x"$md5sum1" == x"$md5sum2" ] \
	   && [ x"$md5sum1" != x"-1" ] \
	   && [ x"$metric_id" == x"time" ] \
	   && [ "$metric" != "n/a" ]; then
	    if (( $metric != 0 )); then
	        echo "warning: Samples differ for $symbol having same md5sums: $time1, $time2"
	    fi
	    # TODO: Fix md5sum processing.
	    #continue
	fi

	# Skip case where we have no info ("n/a")
	if [ "$metric" != "n/a" ]; then
	    metric=$(($metric - 100))
	    # For vect metric, relative value < 100 will be a regression.
	    # So effectively for any metric, if $metric is positive,
	    # then it's a regression.
	    if [ x"$metric_id" = x"vect" ]; then
		metric=$((-$metric))
	    fi
	    # Remove padding from the tail of $symbol (padding is added by
	    # csvs2table.py for better formatting).
	    short_symbol="$(echo "$symbol" | sed -e "s/ *\$//")"

	    local bmk_exe
	    case "$short_symbol" in
		"["*) bmk_exe=false ;;
		*"_base.default") bmk_exe=true ;;
		*) bmk_exe=false ;;
	    esac

	    local threshold neg_threshold
	    if $bmk_exe; then
		threshold=$exe_threshold
		neg_threshold=$((-exe_threshold))
	    else
		threshold=$symbol_threshold
		neg_threshold=$((-symbol_threshold))
	    fi

	    if [ "$metric" -gt "$threshold" ] || [ "$metric" -lt "$neg_threshold" ]; then
	        if [ "$metric" -gt "$threshold" ]; then
		   change_kind="regression"
		   changed_by=$regressed_by
		else
		   change_kind="improvement"
		   changed_by=$improved_by
		fi

		# There's a regression
		result=100

		case $metric_id in
		    size)
			short_diag="$changed_by ${metric}%"
			long_diag="$short_diag from $size1 to $size2 bytes"
			;;
		    time)
			short_diag="$changed_by ${metric}%"
			long_diag="$short_diag from $time1 to $time2 perf samples" ;;
		    vect)
			short_diag="$changed_by ${metric}%"
			long_diag="$short_diag from $vect1 to $vect2" ;;
		    *) assert false ;;
		esac

		if $bmk_exe; then
		    short_diag="$bmk $short_diag"
		    long_diag="$bmk $long_diag"
		    # Detect magic sample counts that indicate failure to build
		    # and failure to run
		    case "$time1:$time2" in
			888888888:888888888|999999999:999999999)
			    # Should never happen as we have neither
			    # an improvement nor a regression.
			    assert false
			    ;;
			*:999999999)
			    change_kind="regression"
			    short_diag="$bmk failed to build"
			    long_diag="$short_diag"
			    ;;
			999999999:888888888)
			    change_kind="improvement"
			    short_diag="$bmk built OK, but failed to run"
			    long_diag="$short_diag"
			    ;;
			*:888888888)
			    change_kind="regression"
			    short_diag="$bmk failed to run"
			    long_diag="$short_diag"
			    ;;
			888888888:*)
			    change_kind="improvement"
			    short_diag="$bmk run OK"
			    long_diag="$short_diag"
			    ;;
			999999999:*)
			    change_kind="improvement"
			    short_diag="$bmk built and run OK"
			    long_diag="$short_diag"
			    ;;
		    esac
		    echo "$metric,$bmk,$symbol,$short_diag,$long_diag" >> $run_step_artifacts/exe.$change_kind
		else
		    short_diag="$bmk:$short_symbol $short_diag"
		    long_diag="$bmk:$short_symbol $long_diag"
		    echo "$metric,$bmk,$symbol,$short_diag,$long_diag" >> $run_step_artifacts/$bmk.symbols-$change_kind
		fi
		if [ x"$bmk" != x"$prev_bmk" ]; then
		    printf "++benchmarks %s " $bmk >> $run_step_artifacts/extra-bisect-params
		    prev_bmk="$bmk"
		fi
	    else
		result=1
	    fi
	    echo "$bmk,$symbol,$result" >> $run_step_artifacts/results-compare.csv
	fi
    # Read from results-internal.csv instead of results.csv, because
    # the latter may contain commas in demangled symbol names, which will
    # interfere with parsing.
    done < <(tail -n +2 $run_step_artifacts/results-internal.csv)


    printf "\n" >> $run_step_artifacts/extra-bisect-params

    # Comparison is done.  Below we generate regression report.
    cat > $run_step_artifacts/mail-body.txt <<EOF
After \$COMMIT_COMPONENT \$COMMIT_LOG
EOF

    # aggregate all *.symbol-regression into a file symbol.regression
    find $run_step_artifacts/ -name "*.symbols-regression" -print0 | xargs -0 cat \
	| sort -gr -o $run_step_artifacts/symbol.regression
    if [ x"$(cat $run_step_artifacts/symbol.regression)" = x"" ]; then
	# Delete empty file
	rm $run_step_artifacts/symbol.regression
    fi


    # If there's one regression. Don't bother about improvements.
    if [ -f $run_step_artifacts/exe.regression ] || [ -f $run_step_artifacts/symbol.regression ]; then
        change_kind=regression
    else
        change_kind=improvement
    fi


    if [ -f $run_step_artifacts/exe.$change_kind ]; then
	sort -gr -o $run_step_artifacts/exe.$change_kind \
	     $run_step_artifacts/exe.$change_kind

	cat >> $run_step_artifacts/mail-body.txt <<EOF

the following benchmarks $changed_by more than ${exe_threshold}%:
EOF
	local exe
	while IFS=, read metric exe symbol short_diag long_diag; do
	    cat >> $run_step_artifacts/mail-body.txt <<EOF
- $long_diag
EOF
	    if [ -f $run_step_artifacts/$exe.symbols-$change_kind ]; then
		while IFS=, read metric bmk symbol short_diag long_diag; do
		    cat >> $run_step_artifacts/mail-body.txt <<EOF
  - $long_diag
EOF
		done < $run_step_artifacts/$exe.symbols-$change_kind
		# Delete $bmk.regressions so that it doesn't show up
		# in symbol-regression loop below.
		rm $run_step_artifacts/$exe.symbols-$change_kind
	    fi
	done < $run_step_artifacts/exe.$change_kind
    fi

    if [ -f $run_step_artifacts/symbol.$change_kind ]; then
	cat >> $run_step_artifacts/mail-body.txt <<EOF

the following hot functions $regressed_by more than ${symbol_threshold}% (but their benchmarks $regressed_by less than ${exe_threshold}%):
EOF
	while IFS=, read metric bmk symbol short_diag long_diag; do
	    cat >> $run_step_artifacts/mail-body.txt <<EOF
- $long_diag
EOF
	done < $run_step_artifacts/symbol.$change_kind
    fi

    # Put change summary into jira-body.txt.
    cp $run_step_artifacts/mail-body.txt $run_step_artifacts/jira-body.txt

    local bmk_suite="" publish_save_temps=false
    case "${benchmarks[*]}" in
	coremark) bmk_suite="EEMBC CoreMark" ;;
	spec2k6|4*)
	    bmk_suite="SPEC CPU2006"
	    publish_save_temps=true
	    ;;
	spec2017|5*|6*)
	    bmk_suite="SPEC CPU2017"
	    publish_save_temps=true
	    ;;
    esac

    cat >> $run_step_artifacts/mail-body.txt <<EOF

Below reproducer instructions can be used to re-build both "first_bad" and "last_good" cross-toolchains used in this bisection.  Naturally, the scripts will fail when triggerring benchmarking jobs if you don\'t have access to Linaro TCWG CI.
EOF

    # Copy save-temps tarballs to artifacts, so that they are accessible.
    # We can publish pre-processed source only for benchmarks derived from
    # open-source projects.
    # Note that we include save-temps artifacts for successful builds so that
    # "last_good" build has the artifacts.
    if $publish_save_temps; then
	mkdir -p $run_step_artifacts/top-artifacts
	local s_t
	while read s_t; do
	    rsync -a "$s_t/" $run_step_artifacts/top-artifacts/save-temps/
	done < <(find results-1 -type d -name "save.*.temps")
    fi

    if [ -d $run_step_artifacts/top-artifacts/save-temps/ ]; then
	cat >> $run_step_artifacts/mail-body.txt <<EOF

For your convenience, we have uploaded tarballs with pre-processed source and assembly files at:
- First_bad save-temps: \$FIRST_BAD_ARTIFACTS/save-temps/
- Last_good save-temps: \$LAST_GOOD_ARTIFACTS/save-temps/
- Baseline save-temps: \$BASELINE_ARTIFACTS/save-temps/
EOF
    fi

    local compiler="" libc="" linker="" version="" target="" bmk_flags="" hw=""
    case "${rr[toolchain]}" in
	gnu)
	    compiler="GCC"
	    libc="Glibc"
	    linker="GNU Linker"
	    ;;
	gnu_eabi)
	    compiler="GCC"
	    libc="Newlib"
	    linker="GNU LD"
	    ;;
	llvm)
	    compiler="Clang"
	    libc="Glibc"
	    linker="LLVM Linker"
	    ;;
    esac
    case "${rr[ci_config]}" in
	*-master-*) version="tip of trunk" ;;
	*-release-*) version="latest release branch" ;;
    esac
    target=$(print_gnu_target ${rr[target]})
    bmk_flags=$(echo "$cflags" | sed -e "s/_/ -/g" -e "s/LTO/flto/g" \
				     -e "s/VECT/fdump-tree-vect-details/g")
    case "${rr[ci_project]}" in
	*_apm*) hw="APM Mustang 8x X-Gene1" ;;
	*_tk1*) hw="NVidia TK1 4x Cortex-A15" ;;
	*_tx1*) hw="NVidia TX1 4x Cortex-A57" ;;
	*_stm32*) hw="STMicroelectronics STM32L476RGTx 1x Cortex-M4" ;;
    esac

    cat >> $run_step_artifacts/mail-body.txt <<EOF

Configuration:
- Benchmark: $bmk_suite
- Toolchain: $compiler + $libc + $linker
- Version: all components were built from their $version
- Target: $target
- Compiler flags: $bmk_flags
- Hardware: $hw

This benchmarking CI is work-in-progress, and we welcome feedback and suggestions at linaro-toolchain@lists.linaro.org .  In our improvement plans is to add support for SPEC CPU2017 benchmarks and provide "perf report/annotate" data behind these reports.
EOF

    # Generate mail subject
    if [ -f $run_step_artifacts/exe.$change_kind ]; then
	IFS=, read metric bmk symbol short_diag long_diag \
	   < <(head -n1 $run_step_artifacts/exe.$change_kind)
    elif [ -f $run_step_artifacts/symbol.$change_kind ]; then
	IFS=, read metric bmk symbol short_diag long_diag \
	   < <(head -n1 $run_step_artifacts/symbol.$change_kind)
    else
	short_diag="No change"
    fi

    cat > $run_step_artifacts/mail-subject.txt <<EOF
[TCWG CI] $short_diag after \$COMMIT_COMPONENT: \$COMMIT_SUBJECT
EOF

    # no_regression_p() uses results.regressions to judge whether there
    # was a regression.  Generate results.regressions only on regression.
    if [ x"$change_kind" = x"regression" ]; then
	cat $run_step_artifacts/jira-body.txt \
	    | sed -e "s/^/# /" > $run_step_artifacts/results.regressions
	echo "# $short_diag" >> $run_step_artifacts/results.regressions
    fi
    )
}

# Exit with code 0 if no regression compared to base-artifacts/.
# Inspect build results ./results and performance results in ./results_id.
no_regression_p ()
{
    (
    set -euf -o pipefail

    no_build_regression_p "$@"

    local ref_artifacts=$1
    local new_artifacts=$2

    if ! [ -f "$ref_artifacts/results_id" ]; then
	return 0
    fi
    # <Workaround> missing reference results, which we have listed in
    # tcwg-benchmark-results.broken-list.  Once all entries referencing missing
    # results are discarded, we'll remove this workaround.
    # Otherwise compare_results will fail while fetching baseline results,
    # and we'll consider this failure as a regression.
    if cat "$scripts/tcwg-benchmark-results.broken-list" \
	    | grep -q "^$(cat $ref_artifacts/results_id)\$"; then
	return 0
    fi
    # </Workaround>
    if ! [ -f "$new_artifacts/results_id" ]; then
	return 1
    fi

    # Make sure there is no stray results.regression file, which we use
    # as failure marker.
    # We can, potentially, call ${rr[no_regression_p]} several times in
    # a row during update_baseline() step, but we should stop at the first
    # regression.  Therefore, we should never see results.regressions exist.
    assert ! [ -f $run_step_artifacts/results.regressions ]

    local compare_opts=""
    case "$cflags" in
	*"_LTO"*) compare_opts="--num_symbols 0 --entry_threshold 10" ;;
    esac
    compare_results "$metric_id" "$ref_artifacts/results_id" "$new_artifacts/results_id" "$compare_opts"
    if [ -f $run_step_artifacts/results.regressions ]; then
	return 1
    fi
    return 0
    )
}

# Implement rr[breakup_changed_components] hook.
tcwg_bmk_breakup_changed_components ()
{
    (
    set -euf -o pipefail

    # Compiler changes tend to cause the most regressions.
    # Breakup updated components into compiler and the rest of components
    # to reduce the number of builds.
    local cc
    case "${rr[toolchain]}" in
	llvm) cc="llvm" ;;
	gnu|gnu_eabi) cc="gcc" ;;
	*) assert false ;;
    esac

    if print_changed_components "\n" | grep -q "^$cc\$"; then
	echo "$cc"
	print_changed_components "\n" | grep -v "^$cc\$" | tr '\n' ' ' | sed -e "s/ \$//g"
	echo
    else
	print_changed_components "\n"
    fi
    )
}

rr[breakup_changed_components]=tcwg_bmk_breakup_changed_components

run_step stop_on_fail -10 reset_artifacts
run_step stop_on_fail x prepare_abe
run_step skip_on_fail -9 build_abe binutils
run_step skip_on_fail -8 build_abe stage1 -- "${gcc_override_configure[@]}"
run_step skip_on_fail x clean_sysroot
case "${rr[components]}" in
    *glibc*)
	run_step skip_on_fail -7 build_abe linux
	run_step skip_on_fail -6 build_abe glibc
	;;
    *newlib*)
	run_step skip_on_fail -6 build_abe newlib
	;;
esac
run_step skip_on_fail -5 build_abe stage2 -- "${gcc_override_configure[@]}"
case "${rr[toolchain]}" in
    llvm) run_step skip_on_fail -3 build_llvm true ;;
esac
run_step skip_on_fail 1 benchmark -- "$cflags" ${rr[top_artifacts]}/results_id
run_step reset_on_fail x check_regression
run_step stop_on_fail x update_baseline

trap "" EXIT