summaryrefslogtreecommitdiff
path: root/tcwg_bmk-build.sh
diff options
context:
space:
mode:
Diffstat (limited to 'tcwg_bmk-build.sh')
-rwxr-xr-xtcwg_bmk-build.sh813
1 files changed, 317 insertions, 496 deletions
diff --git a/tcwg_bmk-build.sh b/tcwg_bmk-build.sh
index 87263a59..f1cf6c64 100755
--- a/tcwg_bmk-build.sh
+++ b/tcwg_bmk-build.sh
@@ -10,86 +10,66 @@ scripts=$(dirname $0)
convert_args_to_variables "$@"
-obligatory_variables rr[ci_project] rr[ci_config] ssh_host ssh_port
+obligatory_variables rr[ci_project] rr[ci_config]
+declare -A rr
-# Execution mode: baseline, bisect, jenkins-full
-# shellcheck disable=SC2154
-rr[mode]="${rr[mode]-baseline}"
+# All bmk config about hw and benchs is implemented in this file
+# shellcheck source=tcwg_bmk-config.sh
+. $scripts/tcwg_bmk-config.sh
+
+# Execution mode: build or bisect
+rr[mode]="${rr[mode]-build}"
# Set custom revision for one of the projects, and use baseline revisions
# for all other projects.
-# shellcheck disable=SC2154
rr[baseline_branch]="${rr[baseline_branch]-linaro-local/ci/${rr[ci_project]}/${rr[ci_config]}}"
-# shellcheck disable=SC2154
-rr[update_baseline]="${rr[update_baseline]-update}"
-# shellcheck disable=SC2154
+rr[update_baseline]="${rr[update_baseline]-ignore}"
rr[top_artifacts]="${rr[top_artifacts]-$(pwd)/artifacts}"
-# Set metric to perf by default.
-# shellcheck disable=SC2154
-rr[metric]="${rr[metric]-perf}"
-
-# {toolchain_name}-{toolchain_ver}-{target}-{bmk}-{cflags}
-IFS=- read -a ci_config <<EOF
-${rr[ci_config]}
+# ${ci_project}--${ci_config} format is :
+# 'tcwg_bmk-#{PROFILE_NAME}-#{BMK}--#{TOOLCHAIN}-#{TARGET}-{toolchain_ver}-{cflags}'
+IFS=- read -a ci_pjt_cfg <<EOF
+${rr[ci_project]}--${rr[ci_config]}
EOF
-# shellcheck disable=SC2154
-rr[toolchain]=${rr[toolchain]-${ci_config[0]}}
-# shellcheck disable=SC2154
-rr[target]=${rr[target]-${ci_config[2]}}
-benchmarks=("${benchmarks[@]-${ci_config[3]}}")
-if [ x"${benchmarks[*]}" = x"default" ]; then
- benchmarks=("${ci_config[3]}")
-fi
-if ! test_array cflags; then
- ci_config=("${ci_config[@]:4}")
- # In ${ci_config[@]} we now have "-"-separated entries (due to IFS=- above).
- # We restore "-" in compiler flags when doing flags="$flags-$flag" below.
- # We use "_" to separate compiler options, and it is translated to " -"
- # in benchmark().
- cflags=()
- while [ ${#ci_config[@]} -ge 1 ]; do
- flags=""
- while [ ${#ci_config[@]} -ge 1 ]; do
- flag="${ci_config[0]}"
- ci_config=("${ci_config[@]:1}")
- if [ x"$flag" = x"vs" ]; then
- break
- fi
- flags="$flags-$flag"
- done
- cflags+=("$flags")
- done
-fi
+
+rr[toolchain]=${rr[toolchain]-${ci_pjt_cfg[4]}}
+rr[target]=${rr[target]-${ci_pjt_cfg[5]}}
+
+cflags="${cflags--${ci_pjt_cfg[7]}}"
gcc_mode=""
-for i in $(seq 0 $(("${#cflags[@]}" - 1))); do
- cflags_mode=""
- if [[ x"${cflags[$i]}" == x*"VECT"* ]]; then
- rr[metric]="vect"
- fi
+case "${rr[target]}:$cflags" in
+ "arm:"*"mthumb"*) gcc_mode=thumb ;;
+ "arm:"*"marm"*) gcc_mode=arm ;;
+ "arm:-Os"*|"arm:-Oz"*)
+ gcc_mode=thumb
+ cflags="${cflags}_mthumb"
+ ;;
+ "arm:"*)
+ gcc_mode=arm
+ cflags="${cflags}_marm"
+ ;;
+ "arm_eabi:"*)
+ cflags="${cflags}_mthumb"
+ ;;
+esac
- case "${rr[target]}:${cflags[$i]}" in
- "arm:"*"mthumb"*) cflags_mode=thumb ;;
- "arm:"*"marm"*) cflags_mode=arm ;;
- "arm:-Os"*|"arm:-Oz"*)
- cflags_mode=thumb
- cflags[$i]="${cflags[$i]}_mthumb"
- ;;
- "arm:"*)
- cflags_mode=arm
- cflags[$i]="${cflags[$i]}_marm"
- ;;
- "arm_eabi:"*)
- cflags[$i]="${cflags[$i]}_mthumb"
- ;;
- esac
- if [ x"$gcc_mode" = x"" ]; then
- gcc_mode="$cflags_mode"
- elif [ x"$gcc_mode" != x"$cflags_mode" ]; then
- assert_with_msg "Unsupported arm/thumb configuration ${cflags[$(($i - 1))]} and ${cflags[$i]}" false
- fi
-done
+cflags="$(echo $cflags | sed -e "s/_/ -/g" -e "s/LTO/flto/g")"
+
+case "${rr[ci_project]}" in
+ *-*_size-*) rr[metric_id]="size" ;;
+ *-*_speed-*) rr[metric_id]="sample" ;;
+ *-*_vect-*) rr[metric_id]="num_vect_loops" ;;
+ *-*_sve-*) rr[metric_id]="num_sve_loops" ;;
+ *) assert_with_msg "Cannot determine metric from ${rr[ci_project]}" false ;;
+esac
+
+called_from_notify=${called_from_notify-false}
+
+hw=$(tcwg_bmk_hw)
+hw=${hw%_32} ; hw=${hw%_64}
+
+# -----------------------------------------------------------------------
gcc_override_configure=()
# Set default ARM/Thumb mode for AArch32 compiler. This ensures that libraries
@@ -109,18 +89,23 @@ gcc_override_configure+=("--set" "gcc_override_configure=--disable-libsanitizer"
# board type.
case "${rr[target]}" in
"arm_eabi") gcc_override_configure+=("--set" "gcc_override_configure=--disable-multilib"
- "--set" "gcc_override_configure=--with-cpu=cortex-m4"
"--set" "gcc_override_configure=--with-mode=thumb"
"--set" "gcc_override_configure=--with-float=hard"
) ;;
esac
+
+rr[cpu]=$(tcwg_bmk_cpu)
+if [ "${rr[cpu]}" != "" ]; then
+ gcc_override_configure+=("--set" "gcc_override_configure=--with-cpu=${rr[cpu]}")
+ cflags="$cflags -mcpu=${rr[cpu]}"
+fi
+
case "${rr[toolchain]}" in
llvm)
- # shellcheck disable=SC2154
- rr[components]="binutils gcc glibc llvm" ;;
+ rr[components]="llvm" ;;
gnu)
- rr[components]="binutils gcc glibc" ;;
+ rr[components]="binutils gcc linux glibc" ;;
gnu_eabi)
rr[components]="binutils gcc newlib" ;;
*) assert_with_msg "Unknown toolchain \"${rr[toolchain]}\"" false ;;
@@ -144,24 +129,19 @@ trap print_traceback EXIT
default_start_at=""
default_finish_at=""
case "${rr[mode]}" in
- "baseline")
- default_finish_at="update_baseline"
- ;;
"bisect")
single_updated_component="$(print_single_updated_component)"
case $single_updated_component in
binutils) default_start_at="build_abe-binutils" ;;
gcc) default_start_at="build_abe-stage1" ;;
- glibc) default_start_at="clean_sysroot" ;;
- llvm) default_start_at="build_llvm-true" ;;
+ linux|glibc) default_start_at="clean_sysroot" ;;
+ llvm) default_start_at="build_bmk_llvm" ;;
newlib) default_start_at="build_abe-newlib" ;;
*) assert_with_msg \
- "Invalid single updated component \"$single_updated_component\"" false
- ;;
+ "Invalid single updated component \"$single_updated_component\"" false
+ ;;
esac
- default_finish_at="check_regression"
;;
- "jenkins-full") ;;
esac
if [ x"$start_at" = x"default" ]; then
start_at="$default_start_at"
@@ -170,33 +150,52 @@ if [ x"$finish_at" = x"default" ]; then
finish_at="$default_finish_at"
fi
+case "${rr[ci_project]}/${rr[ci_config]}" in
+ tcwg_bmk-code_speed-cpu2017rate/gnu-aarch64-master-O2|\
+ tcwg_bmk-code_speed-cpu2017rate/gnu-aarch64-master-O3|\
+ tcwg_bmk-code_speed-cpu2017rate/llvm-aarch64-master-O2|\
+ tcwg_bmk-code_speed-cpu2017rate/llvm-aarch64-master-O3)
+ rr[major]=3
+ rr[minor]=0
+ ;;
+ *)
+ rr[major]=2
+ rr[minor]=3
+ ;;
+esac
+
run_step_init "$start_at" "$finish_at" "${rr[top_artifacts]}" "$verbose"
-# If we bisect a regression between different major versions of Glibc,
-# then we might get a mixed sysroot with several versions of ld-M.N.so and
-# other binaries installed side-by-side. Such a sysroot will break
-# benchmarking, which requires a single ld-*.so binary to be present.
-# Forcefully delete sysroot before building C library.
-clean_sysroot ()
+build_bmk_llvm ()
{
(
set -euf -o pipefail
- local gnu_target sysroot
- gnu_target=$(print_gnu_target ${rr[target]})
- sysroot="$(pwd)/abe/builds/destdir/x86_64-pc-linux-gnu/$gnu_target/libc"
+ local projects="clang;lld;openmp"
+ case "${rr[target]}" in
+ aarch64)
+ # Flang is not supported for AArch32
+ projects="$projects;flang"
+ ;;
+ esac
+
+ build_llvm "$projects" "" "${rr[metric_id]}"
- rm -rf "$sysroot"
+ # Copy shared libraries to runtime sysroot dir
+ mkdir -p llvm-install/libc
+ rsync -a --del --include "*/" --include "*.so*" --exclude "*" \
+ --delete-excluded llvm-install/lib/ llvm-install/libc/lib/
)
}
benchmark ()
{
+ obligatory_variables ssh_host ssh_port
+
(
set -euf -o pipefail
- local bmk_cflags="$1"
- local results_id_file="$3"
+ local bmk_flags="$2"
sanity_check_pwd
@@ -204,511 +203,333 @@ benchmark ()
rm -rf "$(pwd)"/bin
mkdir "$(pwd)"/bin
- local bmk_flags bmk_ldflags reboot run_profile
- bmk_flags="$(echo $bmk_cflags | sed -e "s/_/ -/g" -e "s/LTO/flto/g")"
- bmk_flags="$(echo $bmk_cflags | sed -e "s/_/ -/g" -e "s/VECT/fmetric-vect/g")"
- case "$bmk_cflags" in
- "-Os"*|"-Oz"*)
+ local reboot run_profile
+
+ local hw_tag
+ hw_tag=$(tcwg_bmk_hw)
+
+ case "${rr[ci_project]}" in
+ *_size*|*_vect*|*_sve*)
reboot=false
run_profile="parallel"
+ testmode="verify"
;;
*)
reboot=true
run_profile="serial"
+ testmode="benchmark"
;;
esac
- local bench_list bin cc gnu_target sysroot toolchain
- gnu_target=$(print_gnu_target ${rr[target]})
- sysroot="$(pwd)/abe/builds/destdir/x86_64-pc-linux-gnu/$gnu_target/libc"
+ local bench_list bin cc sysroot toolchain
case "${rr[toolchain]}" in
llvm)
- local llvm_target
- llvm_target=$(echo "$gnu_target" | sed -e "s/^arm-/armv7a-/")
- bmk_flags="$bmk_flags --target=$llvm_target --sysroot=$sysroot"
- bmk_ldflags="$bmk_flags"
- # Use LLD for LLVM configurations.
- # Also, BFD linker crashes for AArch32 LTO builds,
- # see https://projects.linaro.org/browse/LLVM-562 .
- case "$bmk_ldflags" in
- *"-fuse-ld="*) ;;
- *) bmk_ldflags="$bmk_ldflags -fuse-ld=lld" ;;
- esac
+ sysroot="$(pwd)/llvm-install/libc"
bin="$(pwd)/llvm-install/bin"
cc="$bin/"
toolchain="llvm"
;;
gnu|gnu_eabi)
- bmk_ldflags="$bmk_flags"
- bin="$(pwd)/abe/builds/destdir/x86_64-pc-linux-gnu/bin"
+ local gnu_host gnu_target
+ gnu_host=$(print_gnu_target native)
+ gnu_target=$(print_gnu_target ${rr[target]})
+ sysroot="$(pwd)/abe/builds/destdir/$gnu_host/$gnu_target/libc"
+ bin="$(pwd)/abe/builds/destdir/$gnu_host/bin"
cc="$bin/$gnu_target-"
toolchain="gnu"
+ # Append -fdump-statistics-asmname to obtain compile time metrics.
+ bmk_flags="$bmk_flags -fdump-statistics-asmname -fdump-tree-vect-details"
;;
esac
- case "${rr[toolchain]}:${benchmarks[@]}" in
- llvm:spec2k6) bench_list="c_and_cxx" ;;
- gnu:spec2k6) bench_list="all" ;;
- llvm:spec2017) bench_list="spec2017_speed_nofortran" ;;
- gnu:spec2017) bench_list="spec2017_speed" ;;
- *) bench_list="${benchmarks[*]}" ;;
- esac
+
+ bench_list="$(tcwg_bmk_benchs)"
+
# shellcheck disable=SC2154
sysroot="ssh://$ssh_host:$ssh_port:$sysroot"
- local hw_tag
- case "${rr[ci_project]}:${rr[target]}" in
- *_sq_32*:*) hw_tag=sq_32 ;;
- *_sq_64*:*) hw_tag=sq_64 ;;
- *_sq*:arm*) hw_tag=sq_32 ;;
- *_sq*:aarch64) hw_tag=sq_64 ;;
- *_tk1_32*:*) hw_tag=tk1_32 ;;
- *_tk1*:arm*) hw_tag=tk1_32 ;;
- *_tx1_32*:*) hw_tag=tx1_32 ;;
- *_tx1_64*:*) hw_tag=tx1_64 ;;
- *_tx1*:arm*) hw_tag=tx1_32 ;;
- *_tx1*:aarch64) hw_tag=tx1_64 ;;
- *_stm32*:arm*) hw_tag=stm32_STM32L476RGTx ;;
- *) echo "ERROR: Unknown hw_tag for ${rr[ci_project]}:${rr[target]}"; exit 1 ;;
- esac
+ local hw image_arch toolchain_proto
- local hw image_arch
+ toolchain_proto=ssh
case "$hw_tag" in
- sq_32) hw=sq; image_arch=armhf ;;
- sq_64) hw=sq; image_arch=arm64 ;;
- tk1_32) hw=tk1; image_arch=armhf ;;
- tx1_32) hw=tx1; image_arch=armhf ;;
- tx1_64) hw=tx1; image_arch=arm64 ;;
- stm32_STM32L476RGTx) hw=stm32; image_arch=armhf ;;
+ stm32)
+ hw=stm32; image_arch=amd64
+ # When running benchmarks on stm32, we prefer to rsync the
+ # toolchain to the board's host machine -- dev-02.tcwglab.
+ toolchain_proto=rsync
+ ;;
+ *_32) hw=${hw_tag/_32}; image_arch=armhf ;;
+ *_64) hw=${hw_tag/_64}; image_arch=arm64 ;;
*) echo "ERROR: Unknown hw_tag $hw_tag"; exit 1 ;;
esac
- local results_id="$hw_tag/${rr[ci_project]}/${rr[mode]}-${rr[ci_config]}/@build_num@"
-
- # When running benchmarks on stm32, we prefer to rsync the
- # toolchain towards tcwg-bmk-stm32-01.
- case "$hw" in
- stm32) toolchain_proto=rsync ;;
- *) toolchain_proto=ssh ;;
- esac
+ # Create directory for tcwg-benchmark to upload results to.
+ # Note that files inside $results_dir will be owned by tcwg-benchmark.
+ local results_dir
+ results_dir="$(mktemp -d)"
+ chmod 0777 "$results_dir"
+ # Trigger benchmarking job and capture its console output.
+ # Ignore exit code of the trigger command to detect various failure
+ # conditions from examining the console log.
# shellcheck disable=SC2154
remote_exec "ci.linaro.org:2222::-l $USER@linaro.org" \
- build tcwg-benchmark -w \
+ build tcwg-benchmark -f -v \
-p bmk_hw=$hw \
-p bench_list="$bench_list" \
-p cflags="$bmk_flags" \
- -p ldflags="$bmk_ldflags" \
- -p testmode=benchmark \
- -p displaytag="${rr[ci_project]}/${rr[mode]}-${rr[ci_config]}" \
+ -p ldflags="$bmk_flags" \
+ -p testmode="$testmode" \
+ -p displaytag="${rr[ci_project]}/${rr[ci_config]}-${rr[mode]}" \
-p ignore_errors=true \
-p toolchain_url=$toolchain_proto://$ssh_host:$ssh_port:$cc \
-p toolchain_type=$toolchain \
-p sysroot="$sysroot" \
- -p results_id="$results_id" \
+ -p results_dest="$ssh_host:$ssh_port:$results_dir" \
-p reboot="$reboot" \
-p run_profile="$run_profile" \
-p image_arch="$image_arch" \
${scripts_branch+-p scripts_branch="$scripts_branch"} \
${bmk_branch+-p bmk_branch="$bmk_branch"} \
- | tee $run_step_artifacts/benchmark-start.log
+ | tee $run_step_artifacts/benchmark-build.log || true
local build_num
- build_num=$(cat $run_step_artifacts/benchmark-start.log \
- | sed -e "s/.*#\([0-9]\+\).*/\1/")
+ build_num=$(head -n1 $run_step_artifacts/benchmark-build.log \
+ | sed -e "s/Started.*#\([0-9]\+\).*/\1/")
assert_with_msg "Benchmark build number should not be 0!" \
- [ "$build_num" -gt "0" ]
+ [ "$build_num" -gt "0" ]
+
+ cat > $run_step_artifacts/benchmark_job.txt << EOF
+Url: https://ci.linaro.org/job/tcwg-benchmark/$build_num
+Name: $(curl -s "https://ci.linaro.org/job/tcwg-benchmark/$build_num/api/json?tree=displayName" \
+ | jq -r ".displayName")
+EOF
local build_status
local build_ret
while true; do
- (remote_exec "ci.linaro.org:2222::-l $USER@linaro.org" \
- console tcwg-benchmark -n 1 -f $build_num || true) \
- | tee -a $run_step_artifacts/benchmark.log
-
# Ssh connection to ci.linaro.org occasionally drops. We need
# to check whether benchmarking has finished, and, if not, continue
- # to watch its output.
- build_status=$(tail -n 1 $run_step_artifacts/benchmark.log)
+ # waiting.
+ build_status=$(curl -s \
+ "https://ci.linaro.org/job/tcwg-benchmark/$build_num/api/json?tree=result" \
+ | jq -r ".result")
case "$build_status" in
- "Finished: SUCCESS")
+ "null")
+ # Continue waiting
+ true
+ ;;
+ "SUCCESS")
build_ret=0
break
;;
- "Finished: "*)
- echo "# Benchmarking infra is offline:" >> ${rr[top_artifacts]}/results
+ *)
+ echo "# Benchmarking infra is offline:" \
+ >> ${rr[top_artifacts]}/results
echo "-$EXTERNAL_FAIL" >> ${rr[top_artifacts]}/results
build_ret=1
break
;;
esac
- # Sleep a little to avoid flooding ci.linaro.org on transient ssh
- # failures.
- sleep 5
+ # Wait by following console output
+ (ssh -p2222 -l $USER@linaro.org ci.linaro.org \
+ console tcwg-benchmark $build_num -f || true) \
+ | tee $run_step_artifacts/benchmark-wait.log
done
- echo "$results_id" | sed -e "s/@build_num@/$build_num/g" \
- > "$results_id_file"
- return $build_ret
- )
-}
+ rm -rf "${rr[top_artifacts]}/annex"
+ mkdir "${rr[top_artifacts]}/annex"
+ ln -s "$results_dir" "${rr[top_artifacts]}/annex/bmk-data"
-# Compare results obtained from perf data between $1 and $2
-# and generate results-compare.csv
-compare_results_perf ()
-{
- (
- set -euf -o pipefail
-
- case "${cflags[0]}" in
- "-Os"*|"-Oz"*)
- # We use 1% tolerance for binary size
- # and 10% tolerance for symbol size.
- exe_threshold=101
- symbol_threshold=110
- ;;
- *)
- # We use 3% tolerance for binary speed
- # and 15% tolerance for symbol speed.
- exe_threshold=103
- symbol_threshold=115
- # Reduce thresholds when bisecting to avoid considering borderline
- # regressions as spurious. This should break cycles of build and
- # bisect jobs triggering each other on borderline regressions.
- if [ x"${rr[mode]}" = x"bisect" ]; then
- exe_threshold=102
- symbol_threshold=110
- fi
- ;;
- esac
-
- local -a arr
- local bmk symbol time size result prev_bmk
- echo "bmk,symbol,result" > $run_step_artifacts/results-compare.csv
- printf "extra_build_params=" > $run_step_artifacts/extra-bisect-params
-
- # Read result lines from <(tail -n +2 ...) below.
- # "-n +2" is to skip the header line.
- prev_bmk=""
- while IFS=, read -a arr; do
- bmk=${arr[0]}
- symbol=${arr[1]}
- time=${arr[2]}
- size=${arr[3]}
- case "${cflags[0]}" in
- "-Os"*|"-Oz"*) metric="$size" ;;
- *) metric="$time" ;;
- esac
-
- # Skip case where we have no info ("n/a")
- if [ "$metric" != "n/a" ]; then
- # Remove padding from the tail of $symbol (padding is added by
- # csvs2table.py for better formatting).
- local short_symbol="${symbol%%[ ]*}"
- case "$short_symbol" in
- "["*) threshold=$symbol_threshold ;;
- *"_base.default") threshold=$exe_threshold ;;
- *) threshold=$symbol_threshold ;;
- esac
- if ! [ "$metric" -le "$threshold" ]; then
- result=100
- echo "# $bmk,$symbol regressed by $metric" >> $run_step_artifacts/results.regressions
- if [ x"$bmk" != x"$prev_bmk" ]; then
- printf "++benchmarks %s " $bmk >> $run_step_artifacts/extra-bisect-params
- prev_bmk="$bmk"
- fi
- else
- result=1
- fi
- echo "$bmk,$symbol,$result" >> $run_step_artifacts/results-compare.csv
- fi
- done < <(tail -n +2 $run_step_artifacts/results.csv)
- printf "\n" >> $run_step_artifacts/extra-bisect-params
+ return $build_ret
)
}
-compare_results_vect ()
+# Exit with code 0 if no regression compared to base-artifacts/.
+no_regression_p ()
{
(
set -euf -o pipefail
- echo "bmk,symbol,result" > $run_step_artifacts/results-compare.csv
-
- while IFS=, read -a arr; do
- bmk=${arr[0]}
- # hack to trim padding
- symbol=$(echo ${arr[1]} | xargs)
- base_num_vect_loops=${arr[3]}
- target_num_vect_loops=${arr[4]}
- if (( base_num_vect_loops > target_num_vect_loops )); then
- echo "$bmk, $symbol, $base_num_vect_loops, $target_num_vect_loops" \
- >> $run_step_artifacts/results-compare.csv
- fi
- done < <(tail -n +2 $run_step_artifacts/results.csv)
- )
-}
-compare_results ()
-{
- (
- set -euf -o pipefail
+ # check score-based regression
+ no_build_regression_p "$@"
- local metric=$1
- local ref_results_id="$2"
- local new_results_id="$3"
- local cmp_options="$4"
+ # At this stage, there's no score-based regression.
+ # We are now checking metric-based regression.
- local results_ref results_new
- results_ref=$(cat $ref_results_id)
- results_new=$(cat $new_results_id)
+ assert_with_msg "Benchmarking succeeded, but bmk-data is missing" \
+ [ -e $run_step_top_artifacts/annex/bmk-data ]
- case "${rr[target]}" in
- "arm_eabi")
- cmp_options="$cmp_options --has_perf_logs no"
- ;;
- esac
+ # Make sure there is no stray results.regression file, which we use
+ # as failure marker.
+ assert ! [ -f $run_step_artifacts/results.regressions ]
- $scripts/tcwg-benchmark-results.sh \
- --results_ref $results_ref ++results $results_new \
- --top_artifacts "$run_step_artifacts" --verbose $verbose \
- --metric "$metric" $cmp_options \
- > $run_step_artifacts/results.log 2>&1
-
- case $metric in
- "perf")
- compare_results_perf
- ;;
- "vect")
- compare_results_vect
- ;;
- *)
- echo "Invalid metric: $metric";
- exit 1
- ;;
+ local compare_opts=""
+ case "${rr[target]}:$cflags" in
+ "arm_eabi":*) compare_opts="--has_perf_logs no" ;;
+ *) compare_opts="" ;;
esac
- )
-}
-# Exit with code 0 if no new regressions between results_id-1 and -2 compared to
-# regression between results_id-1 and -2 in base-artifacts/.
-no_regression_vs_p ()
-{
- (
- set -euf -o pipefail
-
- local ref_artifacts=$1
- local new_artifacts=$2
-
- # Check for build and correctness regressions.
- no_build_regression_p "$@"
-
- # Generate ref-results-compare.csv. The value of "1" means that the result
- # in the 2nd run is no worse than the result in the 1st run (as expected).
- # The value of "100" means that the result in the 2nd run is worse than
- # the result in the 1st run (unexpected).
- # Note that we can grab previously-generated ref-results-compares.csv from
- # base-artifacts/, but it could have been generated with an older version
- # of scripts, so it's safer and more resilient to re-generate it from original
- # perf data.
- if [ ! -f "$ref_artifacts/results_id-1" ] || [ ! -f "$ref_artifacts/results_id-2" ]; then
- return 0
+ if [ -f /usr/lib/linux-tools/install-armhf-perf-workaround.sh ]; then
+ # FIXME:
+ # In some cases perf report crashes when run from armhf container on
+ # ARMv8 machine.
+ # Install a workaround while we are investigating the cause.
+ sudo /usr/lib/linux-tools/install-armhf-perf-workaround.sh
fi
- # <Workaround> missing reference results, which we have listed in
- # tcwg-benchmark-results.broken-list. Once all entries referencing missing
- # results are discarded, we'll remove this workaround.
- # Otherwise compare_results will fail while fetching baseline results,
- # and we'll consider this failure as a regression.
- if cat "$scripts/tcwg-benchmark-results.broken-list" \
- | grep -q "^$(cat $ref_artifacts/results_id-1)\$\|^$(cat $ref_artifacts/results_id-2)\$"; then
- return 0
- fi
- # </Workaround>
- compare_results "${rr[metric]}" "$ref_artifacts/results_id-1" "$ref_artifacts/results_id-2" \
- "--num_dsos 1 --num_symbols 0"
- while IFS= read -r -d '' i
- do
- mv $i "$(dirname $i)"/ref-"$(basename $i)"
- done < <(find $run_step_artifacts/ -type f -name "results*" -print0)
+ local new_results="${rr[top_artifacts]}/annex/bmk-data"
+ local ref_results="base-artifacts/annex/bmk-data"
+
+ assert_with_msg "Benchmarking succeeded, but no annex/bmk-data results" \
+ [ -d "$new_results" ]
+
+ # Get the baseline results if necessary :
+ # Already done by reset-artifacts, but this might have been affected if a
+ # rewrite happened between reset-artifacts and the rest. Which is the case
+ # when running from a Jenkins project.
+ git_annex_download base-artifacts annex
+
+ if ! [ -d "$ref_results" ]; then
+ # base-artifacts has no reference results.
+ # This can happen on init build (update_baseline=init).
+ # In such cases we compare results to themselves just as an exercise.
+ ref_results="$new_results"
+ assert_with_msg "No reference results" \
+ [ "${rr[update_baseline]}" = "init" ]
+ fi
- # Similarly, generate new-results-compare.csv.
- if [ ! -f "$new_artifacts/results_id-1" ] || [ ! -f "$new_artifacts/results_id-2" ]; then
- return 1
+ # Compare vs previous run
+ mkdir -p ${rr[top_artifacts]}/results-vs-prev
+ ln -s ../results-vs-prev $run_step_artifacts/results-vs-prev
+ $scripts/tcwg-benchmark-results.sh \
+ --results_ref "$ref_results" ++results "$new_results" \
+ --top_artifacts "${rr[top_artifacts]}/results-vs-prev" \
+ --verbose $verbose --hw_tag "$(tcwg_bmk_hw)" \
+ $compare_opts \
+ > ${rr[top_artifacts]}/results-vs-prev/tcwg-benchmark-results.log 2>&1 &
+
+ local res
+ res=0 && wait $! || res=$?
+ if [ $res != 0 ]; then
+ return $EXTERNAL_FAIL
fi
- compare_results "${rr[metric]}" "$new_artifacts/results_id-1" "$new_artifacts/results_id-2" \
- "--num_dsos 1 --num_symbols 0"
- while IFS= read -r -d '' i
- do
- mv $i "$(dirname $i)"/new-"$(basename $i)"
- done < <(find $run_step_artifacts/ -type f -name "results*" -print0)
-
- # Now compare the two reports.
- # If "ref" has value of "100" (bad state), and "new" has value of "100"
- # (also bad state), then we get no change, no regression, and final value
- # of 100% * 100/100 == 100.
- #
- # If "ref" has value of "1" (good state), and "new" has value of "1"
- # (also good state), then we get no change, no regression, and final value
- # of 100% * 1/1 == 100.
- #
- # If "ref" has value of "100" (bad state), and "new" has value of "1"
- # (good state), then we get a progression, and final value
- # of 100% * 1/100 == 1.
- #
- # If "ref" has value of "1" (good state), and "new" has value of "100"
- # (bad state), then we get a regression, and final value
- # of 100% * 100/1 == 10000. We detect this below by comparing vs "5000".
- $scripts/../bmk-scripts/csvs2table.py -p 0 --relative $run_step_artifacts/ref-results-compare.csv $run_step_artifacts/new-results-compare.csv > $run_step_artifacts/results-compare.csv
-
- local -a arr
- local bmk symbol result status prev_bmk
- local -a bisect_bmks
-
- # Read result lines from <(tail -n +2 ...) below.
- # "-n +2" is to skip the header line. Set $status to "1" if there is
- # a regression.
- status=0
- prev_bmk=""
- # Delete results.regressions generated by compare_results() calls above.
- rm -f $run_step_artifacts/results.regressions
- while IFS=, read -a arr; do
- bmk=${arr[0]}
- symbol=${arr[1]}
- result=${arr[2]}
- if ! [ "$result" -le "5000" ]; then
- echo "# $bmk,$symbol regressed" >> $run_step_artifacts/results.regressions
- status=1
- if [ x"$bmk" != x"$prev_bmk" ]; then
- bisect_bmks+=("++benchmarks" "$bmk")
- prev_bmk="$bmk"
- fi
+
+ # Below call to output-bmk-results.py creates *.regression files.
+ assert_with_msg "Found stale regression files" \
+ [ x"$(find $run_step_artifacts/ -name "*.regression" | wc -l)" = x"0" ]
+
+ # Extract 5 most recent compare-results-vs-prev-internal.csv files from
+ # base-artifacts and compute std deviation out of them
+ local -a csvs_paths
+ csvs_paths=("results-vs-prev/compare-results-internal.csv"
+ "$(basename $run_step_artifacts)/compare-results-vs-prev-internal.csv")
+
+ local -a history_csvs
+ local csv history_root=""
+ while read csv; do
+ if [ "$history_root" = "" ]; then
+ history_root="$csv"
+ continue
+ fi
+
+ history_csvs+=("$csv")
+ done < <(get_git_history -0 base-artifacts "${csvs_paths[@]}")
+
+ local csv tmpf
+ local -a compare_results_list=()
+ tmpf=$(mktemp)
+
+ # FIXME:
+ # To deal with some differences along base-artifacts recent history
+ # - remove 'Failed for column' message from csv file
+ # - skip emtpy csv files.
+ for csv in "${history_csvs[@]}"; do
+ grep -v 'Failed for column' "$csv" > "$tmpf" || true
+ cp "$tmpf" "$csv"
+ if [ -s "$csv" ]; then
+ compare_results_list+=("$csv")
fi
- done < <(tail -n +2 $run_step_artifacts/results-compare.csv)
- echo "extra_build_params=${bisect_bmks[*]}" > $run_step_artifacts/extra-bisect-params
- return $status
- )
-}
+ done
-# Exit with code 0 if no regression compared to base-artifacts/.
-# Inspect build results ./results and performance results in ./results_id.
-no_regression_to_base_p ()
-{
- (
- set -euf -o pipefail
+ if [ ${#compare_results_list[@]} != 0 ]; then
+ $scripts/../bmk-scripts/compute-variability.py \
+ --inputs "${compare_results_list[@]}" ${rr[top_artifacts]}/results-vs-prev/compare-results-internal.csv \
+ --weights linear --method avg \
+ --output ${rr[top_artifacts]}/results-vs-prev/bmk-specific-variability-avg.csv || true
- no_build_regression_p "$@"
+ $scripts/../bmk-scripts/compute-variability.py \
+ --inputs "${compare_results_list[@]}" ${rr[top_artifacts]}/results-vs-prev/compare-results-internal.csv \
+ --weights 2-peaks-linear --method max \
+ --output ${rr[top_artifacts]}/results-vs-prev/bmk-specific-variability-max.csv || true
+ fi
- local ref_artifacts=$1
- local new_artifacts=$2
+ rm -rf "$history_root" "$tmpf"
- if ! [ -f "$ref_artifacts/results_id" ]; then
- return 0
- fi
- # <Workaround> missing reference results, which we have listed in
- # tcwg-benchmark-results.broken-list. Once all entries referencing missing
- # results are discarded, we'll remove this workaround.
- # Otherwise compare_results will fail while fetching baseline results,
- # and we'll consider this failure as a regression.
- if cat "$scripts/tcwg-benchmark-results.broken-list" \
- | grep -q "^$(cat $ref_artifacts/results_id)\$"; then
- return 0
- fi
- # </Workaround>
- if ! [ -f "$new_artifacts/results_id" ]; then
- return 1
- fi
+ $scripts/../bmk-scripts/output-bmk-results.py \
+ --compare_results ${rr[top_artifacts]}/results-vs-prev/compare-results-internal.csv \
+ --variability_file ${rr[top_artifacts]}/results-vs-prev/bmk-specific-variability-avg.csv \
+ --variability_file_data "avg" \
+ --run_step_dir "$run_step_artifacts"/ \
+ --metric "${rr[metric_id]}" --mode "${rr[mode]}" \
+ --details quiet > $run_step_artifacts/output-bmk-results.log
- # Make sure there is no stray results.regression file, which we use
- # as failure marker.
- assert ! [ -f $run_step_artifacts/results.regressions ]
+ # copy inputs useful to build the mail / jira / .. to mail dir
+ for resfile in $run_step_artifacts/{exe,symbol}.{regression,improvement}; do
+ if [ -f $resfile ]; then
+ cp $resfile ${rr[top_artifacts]}/notify/
+ fi
+ done
- local compare_opts=""
- case "${cflags[0]}" in
- *"_LTO"*) compare_opts="--num_symbols 0 --entry_threshold 10" ;;
- esac
- compare_results "${rr[metric]}" "$ref_artifacts/results_id" "$new_artifacts/results_id" "$compare_opts"
+ # return status rely on the presence of the results.regressions file
if [ -f $run_step_artifacts/results.regressions ]; then
+ assert_with_msg "Found a regression while comparing the build against itself" \
+ [ "$ref_results" != "$new_results" ]
return 1
fi
return 0
)
}
-# Implement rr[breakup_updated_components] hook.
-tcwg_bmk_breakup_updated_components ()
-{
- (
- set -euf -o pipefail
-
- # Compiler changes tend to cause the most regressions.
- # Breakup updated components into compiler and the rest of components
- # to reduce the number of builds.
- local cc
- case "${rr[toolchain]}" in
- llvm) cc="llvm" ;;
- gnu|gnu_eabi) cc="gcc" ;;
- *) assert false ;;
- esac
-
- if print_updated_components "\n" | grep -q "^$cc\$"; then
- echo "$cc"
- print_updated_components "\n" | grep -v "^$cc\$" | tr '\n' ' ' | sed -e "s/ \$//g"
- echo
- else
- print_updated_components "\n"
- fi
- )
-}
-# shellcheck disable=SC2154
-rr[breakup_updated_components]=tcwg_bmk_breakup_updated_components
-
-run_step stop_on_fail -10 reset_artifacts
-run_step stop_on_fail x prepare_abe
-run_step skip_on_fail -9 build_abe binutils
-run_step skip_on_fail -8 build_abe stage1 -- "${gcc_override_configure[@]}"
-run_step skip_on_fail x clean_sysroot
-case "${rr[components]}" in
- *glibc*)
- run_step skip_on_fail -7 build_abe linux
- run_step skip_on_fail -6 build_abe glibc
+# Compiler changes tend to cause the most regressions.
+# Breakup updated components into compiler and the rest of components
+# to reduce the number of builds.
+case "${rr[toolchain]}" in
+ llvm)
+ rr[breakup_changed_components]="breakup_changed_components llvm"
;;
- *newlib*)
- run_step skip_on_fail -6 build_abe newlib
+ gnu|gnu_eabi)
+ rr[breakup_changed_components]="breakup_changed_components gcc"
;;
+ *) assert false ;;
esac
-patch_branch=""
-if [ x"${rr[metric]}" = x"vect" ]; then
- patch_branch="--patch linaro-local/vect-metric-branch"
-fi
-
-run_step skip_on_fail -5 build_abe stage2 -- $patch_branch "${gcc_override_configure[@]}"
-
+run_step stop_on_fail -10 reset_artifacts
case "${rr[toolchain]}" in
- llvm) run_step skip_on_fail -3 build_llvm true ;;
-esac
-case "${#cflags[@]}" in
- 2)
- # Don't bisect benchmark build/run failures in *-vs-* configurations.
- # Bisections happen only for regressions with build scores >=0,
- # which will happen if benchmark "${cflags[1]}" succeeds.
- run_step skip_on_fail -1 benchmark "${cflags[0]}" -- ${rr[top_artifacts]}/results_id-1
- run_step skip_on_fail 0 benchmark "${cflags[1]}" -- ${rr[top_artifacts]}/results_id-2
- # Set final "build" score to "1" for compatibility with older results
- run_step skip_on_fail 1 true
- # shellcheck disable=SC2154
- rr[no_regression_p]=no_regression_vs_p
- run_step reset_on_fail x check_regression
+ gnu*)
+ run_step stop_on_fail x prepare_abe
+ run_step skip_on_fail -9 build_abe binutils
+ run_step skip_on_fail -8 build_abe stage1 -- \
+ "${gcc_override_configure[@]}"
+ run_step skip_on_fail x clean_sysroot
+ case "${rr[components]}" in
+ *glibc*)
+ run_step skip_on_fail -7 build_abe linux
+ run_step skip_on_fail -6 build_abe glibc
+ ;;
+ *newlib*)
+ run_step skip_on_fail -6 build_abe newlib
+ ;;
+ esac
+ run_step skip_on_fail -5 build_abe stage2 -- \
+ "${gcc_override_configure[@]}"
;;
- 1)
- # Bisect benchmark build/run failures in non-vs configurations.
- # Set score to "0" with "true".
- run_step skip_on_fail 0 true
- run_step skip_on_fail 1 benchmark "${cflags[0]}" -- ${rr[top_artifacts]}/results_id
- rr[no_regression_p]=no_regression_to_base_p
- run_step reset_on_fail x check_regression
+ llvm)
+
+ run_step skip_on_fail -3 build_bmk_llvm
;;
esac
-run_step stop_on_fail x update_baseline
-run_step stop_on_fail x push_baseline
+run_step skip_on_fail 1 benchmark -- "$cflags"
+run_step reset_on_fail x check_regression
trap "" EXIT