#!/bin/bash

set -euf -o pipefail

scripts=$(dirname $0)
# shellcheck source=jenkins-helpers.sh
. $scripts/jenkins-helpers.sh
# shellcheck source=round-robin.sh
. $scripts/round-robin.sh

convert_args_to_variables "$@"

obligatory_variables rr[ci_project] rr[ci_config] ssh_host ssh_port
declare -A rr

# Execution mode: baseline, bisect, jenkins-full
rr[mode]="${rr[mode]-baseline}"

# Set custom revision for one of the projects, and use baseline revisions
# for all other projects.
rr[baseline_branch]="${rr[baseline_branch]-linaro-local/ci/${rr[ci_project]}/${rr[ci_config]}}"
rr[update_baseline]="${rr[update_baseline]-update}"
rr[top_artifacts]="${rr[top_artifacts]-$(pwd)/artifacts}"

# Set metric to perf by default.
rr[metric]="${rr[metric]-perf}"

# {toolchain_name}-{toolchain_ver}-{target}-{bmk}-{cflags}
IFS=- read -a ci_config <<EOF
${rr[ci_config]}
EOF
rr[toolchain]=${rr[toolchain]-${ci_config[0]}}
rr[target]=${rr[target]-${ci_config[2]}}
benchmarks=("${benchmarks[@]-${ci_config[3]}}")
if [ x"${benchmarks[*]}" = x"default" ]; then
    benchmarks=("${ci_config[3]}")
fi
if ! test_array cflags; then
    ci_config=("${ci_config[@]:4}")
    # In ${ci_config[@]} we now have "-"-separated entries (due to IFS=- above).
    # We restore "-" in compiler flags when doing flags="$flags-$flag" below.
    # We use "_" to separate compiler options, and it is translated to " -"
    # in benchmark().
    cflags=()
    while [ ${#ci_config[@]} -ge 1 ]; do
	flags=""
	while [ ${#ci_config[@]} -ge 1 ]; do
	    flag="${ci_config[0]}"
	    ci_config=("${ci_config[@]:1}")
	    if [ x"$flag" = x"vs" ]; then
		break
	    fi
	    flags="$flags-$flag"
	done
	cflags+=("$flags")
    done
fi

gcc_mode=""
for i in $(seq 0 $(("${#cflags[@]}" - 1))); do
    cflags_mode=""
    if [[ x"${cflags[$i]}" == x*"VECT"* ]]; then
	rr[metric]="vect"
    fi

    case "${rr[target]}:${cflags[$i]}" in
	"arm:"*"mthumb"*) cflags_mode=thumb ;;
	"arm:"*"marm"*) cflags_mode=arm ;;
	"arm:-Os"*|"arm:-Oz"*)
	    cflags_mode=thumb
	    cflags[$i]="${cflags[$i]}_mthumb"
	    ;;
	"arm:"*)
	    cflags_mode=arm
	    cflags[$i]="${cflags[$i]}_marm"
	    ;;
	"arm_eabi:"*)
	    cflags[$i]="${cflags[$i]}_mthumb"
	    ;;
    esac
    if [ x"$gcc_mode" = x"" ]; then
	gcc_mode="$cflags_mode"
    elif [ x"$gcc_mode" != x"$cflags_mode" ]; then
	assert_with_msg "Unsupported arm/thumb configuration ${cflags[$(($i - 1))]} and ${cflags[$i]}" false
    fi
done

gcc_override_configure=()
# Set default ARM/Thumb mode for AArch32 compiler.  This ensures that libraries
# (Glibc, libgcc, libstdc++, libgfortran, etc.) are built in the "interesting"
# ISA.
case "$gcc_mode" in
    arm|thumb) gcc_override_configure+=("--set" "gcc_override_configure=--with-mode=$gcc_mode") ;;
esac
# Disable libsanitizer because it didn't build in April-June 2018.
# We need to test revisions during that time period for performance regressions.
gcc_override_configure+=("--set" "gcc_override_configure=--disable-libsanitizer")

# Build the right libs depending on the target. Hardcode the
# cpu/float-abit/mode to speed up toolchain builds: the alternative of
# building rmprofile multilibs takes a very long time. Setting the
# values here has the drawback that we have to build one toolchain per
# board type.
case "${rr[target]}" in
    "arm_eabi") gcc_override_configure+=("--set" "gcc_override_configure=--disable-multilib"
					 "--set" "gcc_override_configure=--with-cpu=cortex-m4"
					 "--set" "gcc_override_configure=--with-mode=thumb"
					 "--set" "gcc_override_configure=--with-float=hard"
					) ;;
esac

case "${rr[toolchain]}" in
    llvm)
      rr[components]="binutils gcc linux glibc llvm" ;;
    gnu)
      rr[components]="binutils gcc linux glibc" ;;
    gnu_eabi)
      rr[components]="binutils gcc newlib" ;;
    *) assert_with_msg "Unknown toolchain \"${rr[toolchain]}\"" false ;;
esac

# Use baseline branches by default.
for c in ${rr[components]}; do
    rr[${c}_git]=${rr[${c}_git]-baseline}
done

start_at="${start_at-default}"
finish_at="${finish_at-default}"
verbose="${verbose-true}"
verbose2="${verbose2-false}"

if $verbose2; then set -x; fi

trap print_traceback EXIT

# Set start and finish steps for different modes.
default_start_at=""
default_finish_at=""
case "${rr[mode]}" in
    "baseline")
	default_finish_at="update_baseline"
	;;
    "bisect")
	single_updated_component="$(print_single_updated_component)"
	case $single_updated_component in
	    binutils) default_start_at="build_abe-binutils" ;;
	    gcc) default_start_at="build_abe-stage1" ;;
	    linux|glibc) default_start_at="clean_sysroot" ;;
	    llvm) default_start_at="build_llvm-true" ;;
	    newlib) default_start_at="build_abe-newlib" ;;
	    *) assert_with_msg \
        "Invalid single updated component \"$single_updated_component\"" false
      ;;
	esac
	default_finish_at="check_regression"
	;;
    "jenkins-full") ;;
esac
if [ x"$start_at" = x"default" ]; then
    start_at="$default_start_at"
fi
if [ x"$finish_at" = x"default" ]; then
    finish_at="$default_finish_at"
fi

run_step_init "$start_at" "$finish_at" "${rr[top_artifacts]}" "$verbose"

benchmark ()
{
    (
    set -euf -o pipefail

    local bmk_cflags="$2"
    local results_id_file="$3"

    sanity_check_pwd

    # shellcheck disable=SC2115
    rm -rf "$(pwd)"/bin
    mkdir "$(pwd)"/bin

    local bmk_flags bmk_ldflags reboot run_profile
    bmk_flags="$(echo $bmk_cflags | sed -e "s/_/ -/g" -e "s/LTO/flto/g" \
					-e "s/VECT/fdump-tree-vect-details/g")"
    case "$bmk_cflags" in
	"-Os"*|"-Oz"*)
	    reboot=false
	    run_profile="parallel"
	    ;;
	*)
	    reboot=true
	    run_profile="serial"
	    ;;
    esac

    local bench_list bin cc gnu_host gnu_target sysroot toolchain
    gnu_host=$(print_gnu_target native)
    gnu_target=$(print_gnu_target ${rr[target]})
    sysroot="$(pwd)/abe/builds/destdir/$gnu_host/$gnu_target/libc"
    case "${rr[toolchain]}" in
	llvm)
	    local llvm_target
	    llvm_target=$(echo "$gnu_target" | sed -e "s/^arm-/armv7a-/")
	    bmk_flags="$bmk_flags --target=$llvm_target --sysroot=$sysroot"
	    bmk_ldflags="$bmk_flags"
	    # Use LLD for LLVM configurations.
	    # Also, BFD linker crashes for AArch32 LTO builds,
	    # see https://projects.linaro.org/browse/LLVM-562 .
	    case "$bmk_ldflags" in
		*"-fuse-ld="*) ;;
		*) bmk_ldflags="$bmk_ldflags -fuse-ld=lld" ;;
	    esac
	    bin="$(pwd)/llvm-install/bin"
	    cc="$bin/"
	    toolchain="llvm"
	    ;;
	gnu|gnu_eabi)
	    bmk_ldflags="$bmk_flags"
	    bin="$(pwd)/abe/builds/destdir/$gnu_host/bin"
	    cc="$bin/$gnu_target-"
	    toolchain="gnu"
	    ;;
    esac
    case "${rr[toolchain]}:${benchmarks[*]}" in
	llvm:spec2k6) bench_list="c_and_cxx" ;;
	gnu:spec2k6) bench_list="all" ;;
	llvm:spec2017) bench_list="spec2017_speed_nofortran" ;;
	gnu:spec2017) bench_list="spec2017_speed" ;;
	*) bench_list="${benchmarks[*]}" ;;
    esac
    # shellcheck disable=SC2154
    sysroot="ssh://$ssh_host:$ssh_port:$sysroot"

    local hw_tag
    case "${rr[ci_project]}:${rr[target]}" in
	*_apm_32*:*) hw_tag=apm_32 ;;
	*_apm_64*:*) hw_tag=apm_64 ;;
	*_apm*:arm*) hw_tag=apm_32 ;;
	*_apm*:aarch64) hw_tag=apm_64 ;;
	*_sq_32*:*) hw_tag=sq_32 ;;
	*_sq_64*:*) hw_tag=sq_64 ;;
	*_sq*:arm*) hw_tag=sq_32 ;;
	*_sq*:aarch64) hw_tag=sq_64 ;;
	*_tk1_32*:*) hw_tag=tk1_32 ;;
	*_tk1*:arm*) hw_tag=tk1_32 ;;
	*_tx1_32*:*) hw_tag=tx1_32 ;;
	*_tx1_64*:*) hw_tag=tx1_64 ;;
	*_tx1*:arm*) hw_tag=tx1_32 ;;
	*_tx1*:aarch64) hw_tag=tx1_64 ;;
	*_stm32*:arm*) hw_tag=stm32 ;;
	*) echo "ERROR: Unknown hw_tag for ${rr[ci_project]}:${rr[target]}"; exit 1 ;;
    esac

    local hw image_arch toolchain_proto
    toolchain_proto=ssh
    case "$hw_tag" in
	apm_32) hw=apm; image_arch=armhf ;;
	apm_64) hw=apm; image_arch=arm64 ;;
	sq_32) hw=sq; image_arch=armhf ;;
	sq_64) hw=sq; image_arch=arm64 ;;
	tk1_32) hw=tk1; image_arch=armhf ;;
	tx1_32) hw=tx1; image_arch=armhf ;;
	tx1_64) hw=tx1; image_arch=arm64 ;;
	stm32)
	    hw=stm32; image_arch=amd64
	    # When running benchmarks on stm32, we prefer to rsync the
	    # toolchain to the board's host machine -- dev-02.tcwglab.
	    toolchain_proto=rsync
	    ;;
	*) echo "ERROR: Unknown hw_tag $hw_tag"; exit 1 ;;
    esac

    local results_id="$hw_tag/${rr[ci_project]}/${rr[mode]}-${rr[ci_config]}/@build_num@"

    # shellcheck disable=SC2154
    remote_exec "ci.linaro.org:2222::-l $USER@linaro.org" \
		build tcwg-benchmark -f -v \
		-p bmk_hw=$hw \
		-p bench_list="$bench_list" \
		-p cflags="$bmk_flags" \
		-p ldflags="$bmk_ldflags" \
		-p testmode=benchmark \
		-p displaytag="${rr[ci_project]}/${rr[mode]}-${rr[ci_config]}" \
		-p ignore_errors=true \
		-p toolchain_url=$toolchain_proto://$ssh_host:$ssh_port:$cc \
		-p toolchain_type=$toolchain \
		-p sysroot="$sysroot" \
		-p results_id="$results_id" \
		-p reboot="$reboot" \
		-p run_profile="$run_profile" \
		-p image_arch="$image_arch" \
		${scripts_branch+-p scripts_branch="$scripts_branch"} \
		${bmk_branch+-p bmk_branch="$bmk_branch"} \
	| tee $run_step_artifacts/benchmark-build.log

    local build_num
    build_num=$(head -n1 $run_step_artifacts/benchmark-build.log \
		    | sed -e "s/Started.*#\([0-9]\+\).*/\1/")
    assert_with_msg "Benchmark build number should not be 0!" \
		    [ "$build_num" -gt "0" ]

    # The "build" output adds "Completed <build>" to the console output.
    # Strip this last line.
    head -n -1 $run_step_artifacts/benchmark-build.log \
	 > $run_step_artifacts/benchmark.log

    local build_status
    local build_ret
    while true; do
	# Ssh connection to ci.linaro.org occasionally drops.  We need
	# to check whether benchmarking has finished, and, if not, continue
	# to watch its output.  We detect that the job has finished if the last
	# line of console output is "Finished: <something>".
	build_status=$(tail -n 1 $run_step_artifacts/benchmark.log)
	case "$build_status" in
	    "Finished: SUCCESS")
	        build_ret=0
	        break
	        ;;
	    "Finished: "*)
		echo "# Benchmarking infra is offline:" >> ${rr[top_artifacts]}/results
		echo "-$EXTERNAL_FAIL" >> ${rr[top_artifacts]}/results
	        build_ret=1
	        break
	        ;;
	esac

	# After ci.linaro.org update on 2021-10-11 behavior of console command
	# option has changed: before the update it exited immediately for finished builds,
	# and after the update "console" hangs indefinitely for finished builds.
	# We workaround this by using "timeout 1m".
	sleep 300

	(timeout 1m \
		 ssh -p2222 -l $USER@linaro.org ci.linaro.org \
		 console tcwg-benchmark $build_num || true) \
	    | tee $run_step_artifacts/benchmark.log
    done

    echo "$results_id" | sed -e "s/@build_num@/$build_num/g" \
			     > "$results_id_file"
    return $build_ret
    )
}

# Compare results obtained from perf data between $1 and $2
# and generate results-compare.csv
compare_results_perf ()
{
    (
    set -euf -o pipefail

    local exe_threshold symbol_threshold
    case "${cflags[0]}" in
	"-Os"*|"-Oz"*)
	    # We use 1% tolerance for binary size
	    # and 10% tolerance for symbol size.
	    exe_threshold=1
	    symbol_threshold=10
	    ;;
	*)
	    # We use 3% tolerance for binary speed
	    # and 15% tolerance for symbol speed.
	    exe_threshold=3
	    symbol_threshold=15
	    # Reduce thresholds when bisecting to avoid considering borderline
	    # regressions as spurious.  This should break cycles of build and
	    # bisect jobs triggering each other on borderline regressions.
	    if [ x"${rr[mode]}" = x"bisect" ]; then
		exe_threshold=2
		symbol_threshold=10
	    fi
	    ;;
    esac

    local -a arr
    local metric bmk symbol rtime rsize time1 time2 size1 size2
    local regression short_symbol short_regression
    local result prev_bmk
    echo "bmk,symbol,result" > $run_step_artifacts/results-compare.csv
    printf "extra_build_params=" > $run_step_artifacts/extra-bisect-params

    assert_with_msg "Found stale regression files" \
		    [ x"$(find $run_step_artifacts/ -name "*.regression" | wc -l)" = x"0" ]

    local metric_id regressed_by
    case "${cflags[0]}" in
	"-Os"*|"-Oz"*)
	    metric_id="size"
	    regressed_by="grew in size by"
	    ;;
	*)
	    metric_id="time"
	    regressed_by="slowed down by"
	    ;;
    esac

    # Read result lines from <(tail -n +2 ...) below.
    # "-n +2" is to skip the header line.
    prev_bmk=""
    while IFS=, read -a arr; do
	bmk=${arr[0]}
	symbol=${arr[1]}
	rtime=${arr[2]}
	rsize=${arr[3]}
	time1=${arr[4]}
	time2=${arr[5]}
	size1=${arr[6]}
	size2=${arr[7]}

	case $metric_id in
	    size) metric=$rsize ;;
	    time) metric=$rtime ;;
	    *) assert false ;;
	esac

	# Skip case where we have no info ("n/a")
	if [ "$metric" != "n/a" ]; then
	    metric=$(($metric - 100))
	    # Remove padding from the tail of $symbol (padding is added by
	    # csvs2table.py for better formatting).
	    short_symbol="$(echo "$symbol" | sed -e "s/ *\$//")"

	    local bmk_exe
	    case "$short_symbol" in
		"["*) bmk_exe=false ;;
		*"_base.default") bmk_exe=true ;;
		*) bmk_exe=false ;;
	    esac

	    local threshold
	    if $bmk_exe; then
		threshold=$exe_threshold
	    else
		threshold=$symbol_threshold
	    fi

	    if ! [ "$metric" -le "$threshold" ]; then
		result=100

		case $metric_id in
		    size)
			short_regression="$regressed_by ${metric}%"
			regression="$short_regression from $size1 to $size2 bytes"
			;;
		    time)
			short_regression="$regressed_by ${metric}%"
			regression="$short_regression from $time1 to $time2 perf samples" ;;
		    *) assert false ;;
		esac
		if $bmk_exe; then
		    short_regression="$bmk $short_regression"
		    regression="$bmk $regression"
		    # Detect magic sample counts that indicate failure to build
		    # and failure to run
		    case "$time2" in
			888888888)
			    short_regression="$bmk failed to run correctly"
			    regression="$short_regression"
			    ;;
			999999999)
			    short_regression="$bmk failed to build"
			    regression="$short_regression"
			    ;;
		    esac
		    echo "$metric,$bmk,$symbol,$short_regression,$regression" >> $run_step_artifacts/exe.regressions
		else
		    short_regression="$bmk:$short_symbol $short_regression"
		    regression="$bmk:$short_symbol $regression"
		    echo "$metric,$bmk,$symbol,$short_regression,$regression" >> $run_step_artifacts/$bmk.regression
		fi
		if [ x"$bmk" != x"$prev_bmk" ]; then
		    printf "++benchmarks %s " $bmk >> $run_step_artifacts/extra-bisect-params
		    prev_bmk="$bmk"
		fi
	    else
		result=1
	    fi
	    echo "$bmk,$symbol,$result" >> $run_step_artifacts/results-compare.csv
	fi
    done < <(tail -n +2 $run_step_artifacts/results.csv)
    printf "\n" >> $run_step_artifacts/extra-bisect-params

    # Comparison is done.  Below we generate regression report.
    cat > $run_step_artifacts/jira-body.txt <<EOF
After \$COMMIT_COMPONENT \$COMMIT_LOG
EOF
    if [ -f $run_step_artifacts/exe.regressions ]; then
	sort -gr -o $run_step_artifacts/exe.regressions \
	     $run_step_artifacts/exe.regressions

	cat >> $run_step_artifacts/jira-body.txt <<EOF

the following benchmarks $regressed_by more than ${exe_threshold}%:
EOF
	local exe
	while IFS=, read metric exe symbol short_regression regression; do
	    cat >> $run_step_artifacts/jira-body.txt <<EOF
- $regression
EOF
	    if [ -f $run_step_artifacts/$exe.regression ]; then
		while IFS=, read metric bmk symbol short_regression regression; do
		    cat >> $run_step_artifacts/jira-body.txt <<EOF
  - $regression
EOF
		done < $run_step_artifacts/$exe.regression
		# Delete $bmk.regressions so that it doesn't show up
		# in symbol-regression loop below.
		rm $run_step_artifacts/$exe.regression
	    fi
	done < $run_step_artifacts/exe.regressions
    fi

    find $run_step_artifacts/ -name "*.regression" -print0 | xargs -0 cat \
	| sort -gr -o $run_step_artifacts/symbol.regressions
    if [ x"$(cat $run_step_artifacts/symbol.regressions)" = x"" ]; then
	# Delete empty file
	rm $run_step_artifacts/symbol.regressions
    fi

    if [ -f $run_step_artifacts/symbol.regressions ]; then
	cat >> $run_step_artifacts/jira-body.txt <<EOF

the following hot functions $regressed_by more than ${symbol_threshold}% (but their benchmarks $regressed_by less than ${exe_threshold}%):
EOF
	while IFS=, read metric bmk symbol short_regression regression; do
	    cat >> $run_step_artifacts/jira-body.txt <<EOF
- $regression
EOF
	done < $run_step_artifacts/symbol.regressions
    fi

    cp $run_step_artifacts/jira-body.txt $run_step_artifacts/mail-body.txt

    local bmk_suite="" publish_save_temps=false
    case "${benchmarks[*]}" in
	coremark) bmk_suite="EEMBC CoreMark" ;;
	spec2k6|4*)
	    bmk_suite="SPEC CPU2006"
	    publish_save_temps=true
	    ;;
	spec2017|5*|6*)
	    bmk_suite="SPEC CPU2017"
	    publish_save_temps=true
	    ;;
    esac

    cat >> $run_step_artifacts/mail-body.txt <<EOF

Below reproducer instructions can be used to re-build both "first_bad" and "last_good" cross-toolchains used in this bisection.  Naturally, the scripts will fail when triggerring benchmarking jobs if you don't have access to Linaro TCWG CI.
EOF

    # Copy save-temps tarballs to artifacts, so that they are accessible.
    # We can publish pre-processed source only for benchmarks derived from
    # open-source projects.
    # Note that we include save-temps artifacts for successful builds so that
    # "last_good" build has the artifacts.
    if $publish_save_temps; then
	mkdir -p $run_step_artifacts/top-artifacts
	local s_t
	while read s_t; do
	    rsync -a "$s_t/" $run_step_artifacts/top-artifacts/save-temps/
	done < <(find results-1 -type d -name "save.*.temps")
    fi

    if [ -d $run_step_artifacts/top-artifacts/save-temps/ ]; then
	cat >> $run_step_artifacts/mail-body.txt <<EOF

For your convenience, we have uploaded tarballs with pre-processed source and assembly files at:
- First_bad save-temps: \$FIRST_BAD_ARTIFACTS/save-temps/
- Last_good save-temps: \$LAST_GOOD_ARTIFACTS/save-temps/
- Baseline save-temps: \$BASELINE_ARTIFACTS/save-temps/
EOF
    fi

    local compiler="" libc="" linker="" version="" target="" bmk_flags="" hw=""
    case "${rr[toolchain]}" in
	gnu)
	    compiler="GCC"
	    libc="Glibc"
	    linker="GNU Linker"
	    ;;
	gnu_eabi)
	    compiler="GCC"
	    libc="Newlib"
	    linker="GNU LD"
	    ;;
	llvm)
	    compiler="Clang"
	    libc="Glibc"
	    linker="LLVM Linker"
	    ;;
    esac
    case "${rr[ci_config]}" in
	*-master-*) version="tip of trunk" ;;
	*-release-*) version="latest release branch" ;;
    esac
    target=$(print_gnu_target ${rr[target]})
    bmk_flags=$(echo "${cflags[0]}" | sed -e "s/_/ -/g" -e "s/LTO/flto/g" \
					  -e "s/VECT/fdump-tree-vect-details/g")
    case "${rr[ci_project]}" in
	*_apm*) hw="APM Mustang 8x X-Gene1" ;;
	*_tk1*) hw="NVidia TK1 4x Cortex-A15" ;;
	*_tx1*) hw="NVidia TX1 4x Cortex-A57" ;;
	*_stm32*) hw="STMicroelectronics STM32L476RGTx 1x Cortex-M4" ;;
    esac

    cat >> $run_step_artifacts/mail-body.txt <<EOF

Configuration:
- Benchmark: $bmk_suite
- Toolchain: $compiler + $libc + $linker
- Version: all components were built from their $version
- Target: $target
- Compiler flags: $bmk_flags
- Hardware: $hw

This benchmarking CI is work-in-progress, and we welcome feedback and suggestions at linaro-toolchain@lists.linaro.org .  In our improvement plans is to add support for SPEC CPU2017 benchmarks and provide "perf report/annotate" data behind these reports.
EOF

    # Generate mail subject
    if [ -f $run_step_artifacts/exe.regressions ]; then
	IFS=, read metric bmk symbol short_regression regression \
	   < <(head -n1 $run_step_artifacts/exe.regressions)
    elif [ -f $run_step_artifacts/symbol.regressions ]; then
	IFS=, read metric bmk symbol short_regression regression \
	   < <(head -n1 $run_step_artifacts/symbol.regressions)
    else
	# Exit with no regressions
	return 0
    fi

    cat > $run_step_artifacts/mail-subject.txt <<EOF
[TCWG CI] $short_regression after \$COMMIT_COMPONENT: \$COMMIT_SUBJECT
EOF
    cat $run_step_artifacts/jira-body.txt \
	| sed -e "s/^/# /" > $run_step_artifacts/results.regressions
    echo "# $short_regression" >> $run_step_artifacts/results.regressions
    )
}

compare_results_vect ()
{
    (
    set -euf -o pipefail
    echo "bmk,symbol,result" > $run_step_artifacts/results-compare.csv

    while IFS=, read -a arr; do
        bmk=${arr[0]}
        # hack to trim padding
        symbol=$(echo ${arr[1]} | xargs)
        base_num_vect_loops=${arr[3]}
        target_num_vect_loops=${arr[4]}
        if (( base_num_vect_loops > target_num_vect_loops )); then
	    echo "$bmk, $symbol, $base_num_vect_loops, $target_num_vect_loops" \
		>> $run_step_artifacts/results-compare.csv
        fi
    done < <(tail -n +2 $run_step_artifacts/results.csv)
    )
}

compare_results ()
{
    (
    set -euf -o pipefail

    local metric=$1
    local ref_results_id="$2"
    local new_results_id="$3"
    local cmp_options="$4"

    local results_ref results_new
    results_ref=$(cat $ref_results_id)
    results_new=$(cat $new_results_id)

    case "${rr[target]}" in
        "arm_eabi")
            cmp_options="$cmp_options --has_perf_logs no"
            ;;
    esac

    $scripts/tcwg-benchmark-results.sh \
        --results_ref $results_ref ++results $results_new \
        --top_artifacts "$run_step_artifacts" --verbose $verbose \
        --metric "$metric" $cmp_options \
        > $run_step_artifacts/results.log 2>&1

    case $metric in
	"perf")
	    compare_results_perf
	    ;;
	"vect")
	    compare_results_vect
	    ;;
	*)
	    echo "Invalid metric: $metric";
	    exit 1
	    ;;
    esac
    )
}

# Exit with code 0 if no new regressions between results_id-1 and -2 compared to
# regression between results_id-1 and -2 in base-artifacts/.
no_regression_vs_p ()
{
    (
    set -euf -o pipefail

    local ref_artifacts=$1
    local new_artifacts=$2

    # Check for build and correctness regressions.
    no_build_regression_p "$@"

    # Generate ref-results-compare.csv.  The value of "1" means that the result
    # in the 2nd run is no worse than the result in the 1st run (as expected).
    # The value of "100" means that the result in the 2nd run is worse than
    # the result in the 1st run (unexpected).
    # Note that we can grab previously-generated ref-results-compares.csv from
    # base-artifacts/, but it could have been generated with an older version
    # of scripts, so it's safer and more resilient to re-generate it from original
    # perf data.
    if [ ! -f "$ref_artifacts/results_id-1" ] || [ ! -f "$ref_artifacts/results_id-2" ]; then
	return 0
    fi
    # <Workaround> missing reference results, which we have listed in
    # tcwg-benchmark-results.broken-list.  Once all entries referencing missing
    # results are discarded, we'll remove this workaround.
    # Otherwise compare_results will fail while fetching baseline results,
    # and we'll consider this failure as a regression.
    if cat "$scripts/tcwg-benchmark-results.broken-list" \
	    | grep -q "^$(cat $ref_artifacts/results_id-1)\$\|^$(cat $ref_artifacts/results_id-2)\$"; then
	return 0
    fi
    # </Workaround>
    compare_results "${rr[metric]}" "$ref_artifacts/results_id-1" "$ref_artifacts/results_id-2" \
		    "--num_dsos 1 --num_symbols 0"

    while IFS= read -r -d '' i
    do
        mv $i "$(dirname $i)"/ref-"$(basename $i)"
    done < <(find $run_step_artifacts/ -type f -name "results*" -print0)

    # Similarly, generate new-results-compare.csv.
    if [ ! -f "$new_artifacts/results_id-1" ] || [ ! -f "$new_artifacts/results_id-2" ]; then
	return 1
    fi
    compare_results "${rr[metric]}" "$new_artifacts/results_id-1" "$new_artifacts/results_id-2" \
		    "--num_dsos 1 --num_symbols 0"
    while IFS= read -r -d '' i
    do
        mv $i "$(dirname $i)"/new-"$(basename $i)"
    done < <(find $run_step_artifacts/ -type f -name "results*" -print0)

    # Now compare the two reports.
    # If "ref" has value of "100" (bad state), and "new" has value of "100"
    # (also bad state), then we get no change, no regression, and final value
    # of 100% * 100/100 == 100.
    #
    # If "ref" has value of "1" (good state), and "new" has value of "1"
    # (also good state), then we get no change, no regression, and final value
    # of 100% * 1/1 == 100.
    #
    # If "ref" has value of "100" (bad state), and "new" has value of "1"
    # (good state), then we get a progression, and final value
    # of 100% * 1/100 == 1.
    #
    # If "ref" has value of "1" (good state), and "new" has value of "100"
    # (bad state), then we get a regression, and final value
    # of 100% * 100/1 == 10000.  We detect this below by comparing vs "5000".
    $scripts/../bmk-scripts/csvs2table.py -p 0 --relative $run_step_artifacts/ref-results-compare.csv $run_step_artifacts/new-results-compare.csv > $run_step_artifacts/results-compare.csv

    local -a arr
    local bmk symbol result status prev_bmk
    local -a bisect_bmks

    # Read result lines from <(tail -n +2 ...) below.
    # "-n +2" is to skip the header line.  Set $status to "1" if there is
    # a regression.
    status=0
    prev_bmk=""
    # Delete results.regressions generated by compare_results() calls above.
    rm -f $run_step_artifacts/results.regressions
    while IFS=, read -a arr; do
	bmk=${arr[0]}
	symbol=${arr[1]}
	result=${arr[2]}
	if ! [ "$result" -le "5000" ]; then
	    echo "# $bmk,$symbol regressed" >> $run_step_artifacts/results.regressions
	    status=1
	    if [ x"$bmk" != x"$prev_bmk" ]; then
		bisect_bmks+=("++benchmarks" "$bmk")
		prev_bmk="$bmk"
	    fi
	fi
    done < <(tail -n +2 $run_step_artifacts/results-compare.csv)
    echo "extra_build_params=${bisect_bmks[*]}" > $run_step_artifacts/extra-bisect-params
    return $status
    )
}

# Exit with code 0 if no regression compared to base-artifacts/.
# Inspect build results ./results and performance results in ./results_id.
no_regression_to_base_p ()
{
    (
    set -euf -o pipefail

    no_build_regression_p "$@"

    local ref_artifacts=$1
    local new_artifacts=$2

    if ! [ -f "$ref_artifacts/results_id" ]; then
	return 0
    fi
    # <Workaround> missing reference results, which we have listed in
    # tcwg-benchmark-results.broken-list.  Once all entries referencing missing
    # results are discarded, we'll remove this workaround.
    # Otherwise compare_results will fail while fetching baseline results,
    # and we'll consider this failure as a regression.
    if cat "$scripts/tcwg-benchmark-results.broken-list" \
	    | grep -q "^$(cat $ref_artifacts/results_id)\$"; then
	return 0
    fi
    # </Workaround>
    if ! [ -f "$new_artifacts/results_id" ]; then
	return 1
    fi

    # Make sure there is no stray results.regression file, which we use
    # as failure marker.
    # We can, potentially, call ${rr[no_regression_p]} several times in
    # a row during update_baseline() step, but we should stop at the first
    # regression.  Therefore, we should never see results.regressions exist.
    assert ! [ -f $run_step_artifacts/results.regressions ]

    local compare_opts=""
    case "${cflags[0]}" in
	*"_LTO"*) compare_opts="--num_symbols 0 --entry_threshold 10" ;;
    esac
    compare_results "${rr[metric]}" "$ref_artifacts/results_id" "$new_artifacts/results_id" "$compare_opts"
    if [ -f $run_step_artifacts/results.regressions ]; then
	return 1
    fi
    return 0
    )
}

# Implement rr[breakup_updated_components] hook.
tcwg_bmk_breakup_updated_components ()
{
    (
    set -euf -o pipefail

    # Compiler changes tend to cause the most regressions.
    # Breakup updated components into compiler and the rest of components
    # to reduce the number of builds.
    local cc
    case "${rr[toolchain]}" in
	llvm) cc="llvm" ;;
	gnu|gnu_eabi) cc="gcc" ;;
	*) assert false ;;
    esac

    if print_updated_components "\n" | grep -q "^$cc\$"; then
	echo "$cc"
	print_updated_components "\n" | grep -v "^$cc\$" | tr '\n' ' ' | sed -e "s/ \$//g"
	echo
    else
	print_updated_components "\n"
    fi
    )
}
rr[breakup_updated_components]=tcwg_bmk_breakup_updated_components

run_step stop_on_fail -10 reset_artifacts
run_step stop_on_fail x prepare_abe
run_step skip_on_fail -9 build_abe binutils
run_step skip_on_fail -8 build_abe stage1 -- "${gcc_override_configure[@]}"
run_step skip_on_fail x clean_sysroot
case "${rr[components]}" in
    *glibc*)
	run_step skip_on_fail -7 build_abe linux
	run_step skip_on_fail -6 build_abe glibc
	;;
    *newlib*)
	run_step skip_on_fail -6 build_abe newlib
	;;
esac

patch_branch=""
if [ x"${rr[metric]}" = x"vect" ]; then
    patch_branch="--patch linaro-local/vect-metric/master"
fi

run_step skip_on_fail -5 build_abe stage2 -- $patch_branch "${gcc_override_configure[@]}"

case "${rr[toolchain]}" in
    llvm) run_step skip_on_fail -3 build_llvm true ;;
esac
case "${#cflags[@]}" in
    2)
	# Don't bisect benchmark build/run failures in *-vs-* configurations.
	# Bisections happen only for regressions with build scores >=0,
	# which will happen if benchmark "${cflags[1]}" succeeds.
	run_step skip_on_fail -1 benchmark -- "${cflags[0]}" ${rr[top_artifacts]}/results_id-1
	run_step skip_on_fail 0 benchmark -- "${cflags[1]}" ${rr[top_artifacts]}/results_id-2
	# Set final "build" score to "1" for compatibility with older results
	run_step skip_on_fail 1 true
	rr[no_regression_p]=no_regression_vs_p
	run_step reset_on_fail x check_regression
	;;
    1)
	run_step skip_on_fail 1 benchmark -- "${cflags[0]}" ${rr[top_artifacts]}/results_id
	rr[no_regression_p]=no_regression_to_base_p
	run_step reset_on_fail x check_regression
	;;
esac
run_step stop_on_fail x update_baseline
run_step stop_on_fail x push_baseline

trap "" EXIT