summaryrefslogtreecommitdiff
path: root/tcwg_bmk-build.sh
diff options
context:
space:
mode:
Diffstat (limited to 'tcwg_bmk-build.sh')
-rwxr-xr-xtcwg_bmk-build.sh993
1 files changed, 302 insertions, 691 deletions
diff --git a/tcwg_bmk-build.sh b/tcwg_bmk-build.sh
index a5b60197..ce99257b 100755
--- a/tcwg_bmk-build.sh
+++ b/tcwg_bmk-build.sh
@@ -10,80 +10,66 @@ scripts=$(dirname $0)
convert_args_to_variables "$@"
-obligatory_variables rr[ci_project] rr[ci_config] ssh_host ssh_port
+obligatory_variables rr[ci_project] rr[ci_config]
declare -A rr
-# Execution mode: baseline, bisect, jenkins-full
-rr[mode]="${rr[mode]-baseline}"
+# All bmk config about hw and benchs is implemented in this file
+# shellcheck source=tcwg_bmk-config.sh
+. $scripts/tcwg_bmk-config.sh
+
+# Execution mode: build or bisect
+rr[mode]="${rr[mode]-build}"
# Set custom revision for one of the projects, and use baseline revisions
# for all other projects.
rr[baseline_branch]="${rr[baseline_branch]-linaro-local/ci/${rr[ci_project]}/${rr[ci_config]}}"
-rr[update_baseline]="${rr[update_baseline]-update}"
+rr[update_baseline]="${rr[update_baseline]-ignore}"
rr[top_artifacts]="${rr[top_artifacts]-$(pwd)/artifacts}"
-# Set metric to perf by default.
-rr[metric]="${rr[metric]-perf}"
-
-# {toolchain_name}-{toolchain_ver}-{target}-{bmk}-{cflags}
-IFS=- read -a ci_config <<EOF
-${rr[ci_config]}
+# ${ci_project}--${ci_config} format is :
+# 'tcwg_bmk-#{PROFILE_NAME}-#{BMK}--#{TOOLCHAIN}-#{TARGET}-{toolchain_ver}-{cflags}'
+IFS=- read -a ci_pjt_cfg <<EOF
+${rr[ci_project]}--${rr[ci_config]}
EOF
-rr[toolchain]=${rr[toolchain]-${ci_config[0]}}
-rr[target]=${rr[target]-${ci_config[2]}}
-benchmarks=("${benchmarks[@]-${ci_config[3]}}")
-if [ x"${benchmarks[*]}" = x"default" ]; then
- benchmarks=("${ci_config[3]}")
-fi
-if ! test_array cflags; then
- ci_config=("${ci_config[@]:4}")
- # In ${ci_config[@]} we now have "-"-separated entries (due to IFS=- above).
- # We restore "-" in compiler flags when doing flags="$flags-$flag" below.
- # We use "_" to separate compiler options, and it is translated to " -"
- # in benchmark().
- cflags=()
- while [ ${#ci_config[@]} -ge 1 ]; do
- flags=""
- while [ ${#ci_config[@]} -ge 1 ]; do
- flag="${ci_config[0]}"
- ci_config=("${ci_config[@]:1}")
- if [ x"$flag" = x"vs" ]; then
- break
- fi
- flags="$flags-$flag"
- done
- cflags+=("$flags")
- done
-fi
+
+rr[toolchain]=${rr[toolchain]-${ci_pjt_cfg[4]}}
+rr[target]=${rr[target]-${ci_pjt_cfg[5]}}
+
+cflags="${cflags--${ci_pjt_cfg[7]}}"
gcc_mode=""
-for i in $(seq 0 $(("${#cflags[@]}" - 1))); do
- cflags_mode=""
- if [[ x"${cflags[$i]}" == x*"VECT"* ]]; then
- rr[metric]="vect"
- fi
+case "${rr[target]}:$cflags" in
+ "arm:"*"mthumb"*) gcc_mode=thumb ;;
+ "arm:"*"marm"*) gcc_mode=arm ;;
+ "arm:-Os"*|"arm:-Oz"*)
+ gcc_mode=thumb
+ cflags="${cflags}_mthumb"
+ ;;
+ "arm:"*)
+ gcc_mode=arm
+ cflags="${cflags}_marm"
+ ;;
+ "arm_eabi:"*)
+ cflags="${cflags}_mthumb"
+ ;;
+esac
- case "${rr[target]}:${cflags[$i]}" in
- "arm:"*"mthumb"*) cflags_mode=thumb ;;
- "arm:"*"marm"*) cflags_mode=arm ;;
- "arm:-Os"*|"arm:-Oz"*)
- cflags_mode=thumb
- cflags[$i]="${cflags[$i]}_mthumb"
- ;;
- "arm:"*)
- cflags_mode=arm
- cflags[$i]="${cflags[$i]}_marm"
- ;;
- "arm_eabi:"*)
- cflags[$i]="${cflags[$i]}_mthumb"
- ;;
- esac
- if [ x"$gcc_mode" = x"" ]; then
- gcc_mode="$cflags_mode"
- elif [ x"$gcc_mode" != x"$cflags_mode" ]; then
- assert_with_msg "Unsupported arm/thumb configuration ${cflags[$(($i - 1))]} and ${cflags[$i]}" false
- fi
-done
+cflags="$(echo $cflags | sed -e "s/_/ -/g" -e "s/LTO/flto/g")"
+
+case "${rr[ci_project]}" in
+ *-*_size-*) rr[metric_id]="size" ;;
+ *-*_speed-*) rr[metric_id]="sample" ;;
+ *-*_vect-*) rr[metric_id]="num_vect_loops" ;;
+ *-*_sve-*) rr[metric_id]="num_sve_loops" ;;
+ *) assert_with_msg "Cannot determine metric from ${rr[ci_project]}" false ;;
+esac
+
+called_from_notify=${called_from_notify-false}
+
+hw=$(tcwg_bmk_hw)
+hw=${hw%_32} ; hw=${hw%_64}
+
+# -----------------------------------------------------------------------
gcc_override_configure=()
# Set default ARM/Thumb mode for AArch32 compiler. This ensures that libraries
@@ -103,15 +89,21 @@ gcc_override_configure+=("--set" "gcc_override_configure=--disable-libsanitizer"
# board type.
case "${rr[target]}" in
"arm_eabi") gcc_override_configure+=("--set" "gcc_override_configure=--disable-multilib"
- "--set" "gcc_override_configure=--with-cpu=cortex-m4"
"--set" "gcc_override_configure=--with-mode=thumb"
"--set" "gcc_override_configure=--with-float=hard"
) ;;
esac
+
+rr[cpu]=$(tcwg_bmk_cpu)
+if [ "${rr[cpu]}" != "" ]; then
+ gcc_override_configure+=("--set" "gcc_override_configure=--with-cpu=${rr[cpu]}")
+ cflags="$cflags -mcpu=${rr[cpu]}"
+fi
+
case "${rr[toolchain]}" in
llvm)
- rr[components]="binutils gcc linux glibc llvm" ;;
+ rr[components]="llvm" ;;
gnu)
rr[components]="binutils gcc linux glibc" ;;
gnu_eabi)
@@ -137,24 +129,19 @@ trap print_traceback EXIT
default_start_at=""
default_finish_at=""
case "${rr[mode]}" in
- "baseline")
- default_finish_at="update_baseline"
- ;;
"bisect")
single_updated_component="$(print_single_updated_component)"
case $single_updated_component in
binutils) default_start_at="build_abe-binutils" ;;
gcc) default_start_at="build_abe-stage1" ;;
linux|glibc) default_start_at="clean_sysroot" ;;
- llvm) default_start_at="build_llvm-true" ;;
+ llvm) default_start_at="build_bmk_llvm" ;;
newlib) default_start_at="build_abe-newlib" ;;
*) assert_with_msg \
- "Invalid single updated component \"$single_updated_component\"" false
- ;;
+ "Invalid single updated component \"$single_updated_component\"" false
+ ;;
esac
- default_finish_at="check_regression"
;;
- "jenkins-full") ;;
esac
if [ x"$start_at" = x"default" ]; then
start_at="$default_start_at"
@@ -163,15 +150,52 @@ if [ x"$finish_at" = x"default" ]; then
finish_at="$default_finish_at"
fi
+case "${rr[ci_project]}/${rr[ci_config]}" in
+ tcwg_bmk-code_speed-cpu2017rate/gnu-aarch64-master-O2|\
+ tcwg_bmk-code_speed-cpu2017rate/gnu-aarch64-master-O3|\
+ tcwg_bmk-code_speed-cpu2017rate/llvm-aarch64-master-O2|\
+ tcwg_bmk-code_speed-cpu2017rate/llvm-aarch64-master-O3)
+ rr[major]=3
+ rr[minor]=0
+ ;;
+ *)
+ rr[major]=2
+ rr[minor]=3
+ ;;
+esac
+
run_step_init "$start_at" "$finish_at" "${rr[top_artifacts]}" "$verbose"
+build_bmk_llvm ()
+{
+ (
+ set -euf -o pipefail
+
+ local projects="clang;lld;openmp"
+ case "${rr[target]}" in
+ aarch64)
+ # Flang is not supported for AArch32
+ projects="$projects;flang"
+ ;;
+ esac
+
+ build_llvm "$projects" "" "${rr[metric_id]}"
+
+ # Copy shared libraries to runtime sysroot dir
+ mkdir -p llvm-install/libc
+ rsync -a --del --include "*/" --include "*.so*" --exclude "*" \
+ --delete-excluded llvm-install/lib/ llvm-install/libc/lib/
+ )
+}
+
benchmark ()
{
+ obligatory_variables ssh_host ssh_port
+
(
set -euf -o pipefail
- local bmk_cflags="$2"
- local results_id_file="$3"
+ local bmk_flags="$2"
sanity_check_pwd
@@ -179,740 +203,327 @@ benchmark ()
rm -rf "$(pwd)"/bin
mkdir "$(pwd)"/bin
- local bmk_flags bmk_ldflags reboot run_profile
- bmk_flags="$(echo $bmk_cflags | sed -e "s/_/ -/g" -e "s/LTO/flto/g" \
- -e "s/VECT/fdump-tree-vect-details/g")"
- case "$bmk_cflags" in
- "-Os"*|"-Oz"*)
+ local reboot run_profile
+
+ local hw_tag
+ hw_tag=$(tcwg_bmk_hw)
+
+ case "${rr[ci_project]}" in
+ *_size*|*_vect*|*_sve*)
reboot=false
run_profile="parallel"
+ testmode="verify"
;;
*)
reboot=true
run_profile="serial"
+ testmode="benchmark"
;;
esac
- local bench_list bin cc gnu_host gnu_target sysroot toolchain
- gnu_host=$(print_gnu_target native)
- gnu_target=$(print_gnu_target ${rr[target]})
- sysroot="$(pwd)/abe/builds/destdir/$gnu_host/$gnu_target/libc"
+ local bench_list bin cc sysroot toolchain
case "${rr[toolchain]}" in
llvm)
- local llvm_target
- llvm_target=$(echo "$gnu_target" | sed -e "s/^arm-/armv7a-/")
- bmk_flags="$bmk_flags --target=$llvm_target --sysroot=$sysroot"
- bmk_ldflags="$bmk_flags"
- # Use LLD for LLVM configurations.
- # Also, BFD linker crashes for AArch32 LTO builds,
- # see https://projects.linaro.org/browse/LLVM-562 .
- case "$bmk_ldflags" in
- *"-fuse-ld="*) ;;
- *) bmk_ldflags="$bmk_ldflags -fuse-ld=lld" ;;
- esac
+ sysroot="$(pwd)/llvm-install/libc"
bin="$(pwd)/llvm-install/bin"
cc="$bin/"
toolchain="llvm"
;;
gnu|gnu_eabi)
- bmk_ldflags="$bmk_flags"
+ local gnu_host gnu_target
+ gnu_host=$(print_gnu_target native)
+ gnu_target=$(print_gnu_target ${rr[target]})
+ sysroot="$(pwd)/abe/builds/destdir/$gnu_host/$gnu_target/libc"
bin="$(pwd)/abe/builds/destdir/$gnu_host/bin"
cc="$bin/$gnu_target-"
toolchain="gnu"
+ # Append -fdump-statistics-asmname to obtain compile time metrics.
+ bmk_flags="$bmk_flags -fdump-statistics-asmname -fdump-tree-vect-details"
;;
esac
- case "${rr[toolchain]}:${benchmarks[*]}" in
- llvm:spec2k6) bench_list="c_and_cxx" ;;
- gnu:spec2k6) bench_list="all" ;;
- llvm:spec2017) bench_list="spec2017_speed_nofortran" ;;
- gnu:spec2017) bench_list="spec2017_speed" ;;
- *) bench_list="${benchmarks[*]}" ;;
- esac
+
+ bench_list="$(tcwg_bmk_benchs)"
+
# shellcheck disable=SC2154
sysroot="ssh://$ssh_host:$ssh_port:$sysroot"
- local hw_tag
- case "${rr[ci_project]}:${rr[target]}" in
- *_apm_32*:*) hw_tag=apm_32 ;;
- *_apm_64*:*) hw_tag=apm_64 ;;
- *_apm*:arm*) hw_tag=apm_32 ;;
- *_apm*:aarch64) hw_tag=apm_64 ;;
- *_sq_32*:*) hw_tag=sq_32 ;;
- *_sq_64*:*) hw_tag=sq_64 ;;
- *_sq*:arm*) hw_tag=sq_32 ;;
- *_sq*:aarch64) hw_tag=sq_64 ;;
- *_tk1_32*:*) hw_tag=tk1_32 ;;
- *_tk1*:arm*) hw_tag=tk1_32 ;;
- *_tx1_32*:*) hw_tag=tx1_32 ;;
- *_tx1_64*:*) hw_tag=tx1_64 ;;
- *_tx1*:arm*) hw_tag=tx1_32 ;;
- *_tx1*:aarch64) hw_tag=tx1_64 ;;
- *_stm32*:arm*) hw_tag=stm32 ;;
- *) echo "ERROR: Unknown hw_tag for ${rr[ci_project]}:${rr[target]}"; exit 1 ;;
- esac
-
local hw image_arch toolchain_proto
+
toolchain_proto=ssh
case "$hw_tag" in
- apm_32) hw=apm; image_arch=armhf ;;
- apm_64) hw=apm; image_arch=arm64 ;;
- sq_32) hw=sq; image_arch=armhf ;;
- sq_64) hw=sq; image_arch=arm64 ;;
- tk1_32) hw=tk1; image_arch=armhf ;;
- tx1_32) hw=tx1; image_arch=armhf ;;
- tx1_64) hw=tx1; image_arch=arm64 ;;
stm32)
hw=stm32; image_arch=amd64
# When running benchmarks on stm32, we prefer to rsync the
# toolchain to the board's host machine -- dev-02.tcwglab.
toolchain_proto=rsync
;;
+ *_32) hw=${hw_tag/_32}; image_arch=armhf ;;
+ *_64) hw=${hw_tag/_64}; image_arch=arm64 ;;
*) echo "ERROR: Unknown hw_tag $hw_tag"; exit 1 ;;
esac
- local results_id="$hw_tag/${rr[ci_project]}/${rr[mode]}-${rr[ci_config]}/@build_num@"
+ # Create directory for tcwg-benchmark to upload results to.
+ # Note that files inside $results_dir will be owned by tcwg-benchmark.
+ local results_dir
+ results_dir="$(mktemp -d)"
+ chmod 0777 "$results_dir"
+ # Trigger benchmarking job and capture its console output.
+ # Ignore exit code of the trigger command to detect various failure
+ # conditions from examining the console log.
# shellcheck disable=SC2154
remote_exec "ci.linaro.org:2222::-l $USER@linaro.org" \
- build tcwg-benchmark -w \
+ build tcwg-benchmark -f -v \
-p bmk_hw=$hw \
-p bench_list="$bench_list" \
-p cflags="$bmk_flags" \
- -p ldflags="$bmk_ldflags" \
- -p testmode=benchmark \
- -p displaytag="${rr[ci_project]}/${rr[mode]}-${rr[ci_config]}" \
+ -p ldflags="$bmk_flags" \
+ -p testmode="$testmode" \
+ -p displaytag="${rr[ci_project]}/${rr[ci_config]}-${rr[mode]}" \
-p ignore_errors=true \
-p toolchain_url=$toolchain_proto://$ssh_host:$ssh_port:$cc \
-p toolchain_type=$toolchain \
-p sysroot="$sysroot" \
- -p results_id="$results_id" \
+ -p results_dest="$ssh_host:$ssh_port:$results_dir" \
-p reboot="$reboot" \
-p run_profile="$run_profile" \
-p image_arch="$image_arch" \
${scripts_branch+-p scripts_branch="$scripts_branch"} \
${bmk_branch+-p bmk_branch="$bmk_branch"} \
- | tee $run_step_artifacts/benchmark-start.log
+ | tee $run_step_artifacts/benchmark-build.log || true
local build_num
- build_num=$(cat $run_step_artifacts/benchmark-start.log \
- | sed -e "s/.*#\([0-9]\+\).*/\1/")
+ build_num=$(head -n1 $run_step_artifacts/benchmark-build.log \
+ | sed -e "s/Started.*#\([0-9]\+\).*/\1/")
assert_with_msg "Benchmark build number should not be 0!" \
- [ "$build_num" -gt "0" ]
+ [ "$build_num" -gt "0" ]
+
+ cat > $run_step_artifacts/benchmark_job.txt << EOF
+Url: https://ci.linaro.org/job/tcwg-benchmark/$build_num
+Name: $(curl -s "https://ci.linaro.org/job/tcwg-benchmark/$build_num/api/json?tree=displayName" \
+ | jq -r ".displayName")
+EOF
local build_status
local build_ret
while true; do
- (remote_exec "ci.linaro.org:2222::-l $USER@linaro.org" \
- console tcwg-benchmark -n 1 -f $build_num || true) \
- | tee -a $run_step_artifacts/benchmark.log
-
# Ssh connection to ci.linaro.org occasionally drops. We need
# to check whether benchmarking has finished, and, if not, continue
- # to watch its output.
- build_status=$(tail -n 1 $run_step_artifacts/benchmark.log)
+ # waiting.
+ build_status=$(curl -s \
+ "https://ci.linaro.org/job/tcwg-benchmark/$build_num/api/json?tree=result" \
+ | jq -r ".result")
case "$build_status" in
- "Finished: SUCCESS")
+ "null")
+ # Continue waiting
+ true
+ ;;
+ "SUCCESS")
build_ret=0
break
;;
- "Finished: "*)
- echo "# Benchmarking infra is offline:" >> ${rr[top_artifacts]}/results
+ *)
+ echo "# Benchmarking infra is offline:" \
+ >> ${rr[top_artifacts]}/results
echo "-$EXTERNAL_FAIL" >> ${rr[top_artifacts]}/results
build_ret=1
break
;;
esac
- # Sleep a little to avoid flooding ci.linaro.org on transient ssh
- # failures.
- sleep 5
+ # Wait by following console output
+ (ssh -p2222 -l $USER@linaro.org ci.linaro.org \
+ console tcwg-benchmark $build_num -f || true) \
+ | tee $run_step_artifacts/benchmark-wait.log
done
- echo "$results_id" | sed -e "s/@build_num@/$build_num/g" \
- > "$results_id_file"
+ rm -rf "${rr[top_artifacts]}/annex"
+ mkdir "${rr[top_artifacts]}/annex"
+ ln -s "$results_dir" "${rr[top_artifacts]}/annex/bmk-data"
+
return $build_ret
)
}
-# Compare results obtained from perf data between $1 and $2
-# and generate results-compare.csv
-compare_results_perf ()
+# Exit with code 0 if no regression compared to base-artifacts/.
+no_regression_p ()
{
(
set -euf -o pipefail
- local exe_threshold symbol_threshold
- case "${cflags[0]}" in
- "-Os"*|"-Oz"*)
- # We use 1% tolerance for binary size
- # and 10% tolerance for symbol size.
- exe_threshold=1
- symbol_threshold=10
- ;;
- *)
- # We use 3% tolerance for binary speed
- # and 15% tolerance for symbol speed.
- exe_threshold=3
- symbol_threshold=15
- # Reduce thresholds when bisecting to avoid considering borderline
- # regressions as spurious. This should break cycles of build and
- # bisect jobs triggering each other on borderline regressions.
- if [ x"${rr[mode]}" = x"bisect" ]; then
- exe_threshold=2
- symbol_threshold=10
- fi
- ;;
- esac
-
- local -a arr
- local metric bmk symbol rtime rsize time1 time2 size1 size2
- local regression short_symbol short_regression
- local result prev_bmk
- echo "bmk,symbol,result" > $run_step_artifacts/results-compare.csv
- printf "extra_build_params=" > $run_step_artifacts/extra-bisect-params
-
- assert_with_msg "Found stale regression files" \
- [ x"$(find $run_step_artifacts/ -name "*.regression" | wc -l)" = x"0" ]
-
- local metric_id regressed_by
- case "${cflags[0]}" in
- "-Os"*|"-Oz"*)
- metric_id="size"
- regressed_by="grew in size by"
- ;;
- *)
- metric_id="time"
- regressed_by="slowed down by"
- ;;
- esac
-
- # Read result lines from <(tail -n +2 ...) below.
- # "-n +2" is to skip the header line.
- prev_bmk=""
- while IFS=, read -a arr; do
- bmk=${arr[0]}
- symbol=${arr[1]}
- rtime=${arr[2]}
- rsize=${arr[3]}
- time1=${arr[4]}
- time2=${arr[5]}
- size1=${arr[6]}
- size2=${arr[7]}
-
- case $metric_id in
- size) metric=$rsize ;;
- time) metric=$rtime ;;
- *) assert false ;;
- esac
-
- # Skip case where we have no info ("n/a")
- if [ "$metric" != "n/a" ]; then
- metric=$(($metric - 100))
- # Remove padding from the tail of $symbol (padding is added by
- # csvs2table.py for better formatting).
- short_symbol="$(echo "$symbol" | sed -e "s/ *\$//")"
-
- local bmk_exe
- case "$short_symbol" in
- "["*) bmk_exe=false ;;
- *"_base.default") bmk_exe=true ;;
- *) bmk_exe=false ;;
- esac
-
- local threshold
- if $bmk_exe; then
- threshold=$exe_threshold
- else
- threshold=$symbol_threshold
- fi
-
- if ! [ "$metric" -le "$threshold" ]; then
- result=100
-
- case $metric_id in
- size)
- short_regression="$regressed_by ${metric}%"
- regression="$short_regression from $size1 to $size2 bytes"
- ;;
- time)
- short_regression="$regressed_by ${metric}%"
- regression="$short_regression from $time1 to $time2 perf samples" ;;
- *) assert false ;;
- esac
- if $bmk_exe; then
- short_regression="$bmk $short_regression"
- regression="$bmk $regression"
- # Detect magic sample counts that indicate failure to build
- # and failure to run
- case "$time2" in
- 888888888)
- short_regression="$bmk failed to run correctly"
- regression="$short_regression"
- ;;
- 999999999)
- short_regression="$bmk failed to build"
- regression="$short_regression"
- ;;
- esac
- echo "$metric,$bmk,$symbol,$short_regression,$regression" >> $run_step_artifacts/exe.regressions
- else
- short_regression="$bmk:$short_symbol $short_regression"
- regression="$bmk:$short_symbol $regression"
- echo "$metric,$bmk,$symbol,$short_regression,$regression" >> $run_step_artifacts/$bmk.regression
- fi
- if [ x"$bmk" != x"$prev_bmk" ]; then
- printf "++benchmarks %s " $bmk >> $run_step_artifacts/extra-bisect-params
- prev_bmk="$bmk"
- fi
- else
- result=1
- fi
- echo "$bmk,$symbol,$result" >> $run_step_artifacts/results-compare.csv
- fi
- done < <(tail -n +2 $run_step_artifacts/results.csv)
- printf "\n" >> $run_step_artifacts/extra-bisect-params
-
- # Comparison is done. Below we generate regression report.
- cat > $run_step_artifacts/jira-body.txt <<EOF
-After \$COMMIT_COMPONENT \$COMMIT_LOG
-EOF
- if [ -f $run_step_artifacts/exe.regressions ]; then
- sort -gr -o $run_step_artifacts/exe.regressions \
- $run_step_artifacts/exe.regressions
-
- cat >> $run_step_artifacts/jira-body.txt <<EOF
-
-the following benchmarks $regressed_by more than ${exe_threshold}%:
-EOF
- local exe
- while IFS=, read metric exe symbol short_regression regression; do
- cat >> $run_step_artifacts/jira-body.txt <<EOF
-- $regression
-EOF
- if [ -f $run_step_artifacts/$exe.regression ]; then
- while IFS=, read metric bmk symbol short_regression regression; do
- cat >> $run_step_artifacts/jira-body.txt <<EOF
- - $regression
-EOF
- done < $run_step_artifacts/$exe.regression
- # Delete $bmk.regressions so that it doesn't show up
- # in symbol-regression loop below.
- rm $run_step_artifacts/$exe.regression
- fi
- done < $run_step_artifacts/exe.regressions
- fi
-
- find $run_step_artifacts/ -name "*.regression" -print0 | xargs -0 cat \
- | sort -gr -o $run_step_artifacts/symbol.regressions
- if [ x"$(cat $run_step_artifacts/symbol.regressions)" = x"" ]; then
- # Delete empty file
- rm $run_step_artifacts/symbol.regressions
- fi
+ # check score-based regression
+ no_build_regression_p "$@"
- if [ -f $run_step_artifacts/symbol.regressions ]; then
- cat >> $run_step_artifacts/jira-body.txt <<EOF
+ # At this stage, there's no score-based regression.
+ # We are now checking metric-based regression.
-the following hot functions $regressed_by more than ${symbol_threshold}% (but their benchmarks $regressed_by less than ${exe_threshold}%):
-EOF
- while IFS=, read metric bmk symbol short_regression regression; do
- cat >> $run_step_artifacts/jira-body.txt <<EOF
-- $regression
-EOF
- done < $run_step_artifacts/symbol.regressions
- fi
+ assert_with_msg "Benchmarking succeeded, but bmk-data is missing" \
+ [ -e $run_step_top_artifacts/annex/bmk-data ]
- cp $run_step_artifacts/jira-body.txt $run_step_artifacts/mail-body.txt
+ # Make sure there is no stray results.regression file, which we use
+ # as failure marker.
+ assert ! [ -f $run_step_artifacts/results.regressions ]
- local bmk_suite="" publish_save_temps=false
- case "${benchmarks[*]}" in
- coremark) bmk_suite="EEMBC CoreMark" ;;
- spec2k6|4*)
- bmk_suite="SPEC CPU2006"
- publish_save_temps=true
- ;;
- spec2017|5*|6*)
- bmk_suite="SPEC CPU2017"
- publish_save_temps=true
- ;;
+ local compare_opts=""
+ case "${rr[target]}:$cflags" in
+ "arm_eabi":*) compare_opts="--has_perf_logs no" ;;
+ *) compare_opts="" ;;
esac
- cat >> $run_step_artifacts/mail-body.txt <<EOF
-
-Below reproducer instructions can be used to re-build both "first_bad" and "last_good" cross-toolchains used in this bisection. Naturally, the scripts will fail when triggerring benchmarking jobs if you don't have access to Linaro TCWG CI.
-EOF
-
- # Copy save-temps tarballs to artifacts, so that they are accessible.
- # We can publish pre-processed source only for benchmarks derived from
- # open-source projects.
- # Note that we include save-temps artifacts for successful builds so that
- # "last_good" build has the artifacts.
- if $publish_save_temps; then
- mkdir -p $run_step_artifacts/top-artifacts
- local s_t
- while read s_t; do
- rsync -a "$s_t/" $run_step_artifacts/top-artifacts/save-temps/
- done < <(find results-1 -type d -name "save.*.temps")
+ if [ -f /usr/lib/linux-tools/install-armhf-perf-workaround.sh ]; then
+ # FIXME:
+ # In some cases perf report crashes when run from armhf container on
+ # ARMv8 machine.
+ # Install a workaround while we are investigating the cause.
+ sudo /usr/lib/linux-tools/install-armhf-perf-workaround.sh
fi
- if [ -d $run_step_artifacts/top-artifacts/save-temps/ ]; then
- cat >> $run_step_artifacts/mail-body.txt <<EOF
-
-For your convenience, we have uploaded tarballs with pre-processed source and assembly files at:
-- First_bad save-temps: \$FIRST_BAD_ARTIFACTS/save-temps/
-- Last_good save-temps: \$LAST_GOOD_ARTIFACTS/save-temps/
-- Baseline save-temps: \$BASELINE_ARTIFACTS/save-temps/
-EOF
- fi
+ local new_results="${rr[top_artifacts]}/annex/bmk-data"
+ local ref_results="base-artifacts/annex/bmk-data"
- local compiler="" libc="" linker="" version="" target="" bmk_flags="" hw=""
- case "${rr[toolchain]}" in
- gnu)
- compiler="GCC"
- libc="Glibc"
- linker="GNU Linker"
- ;;
- gnu_eabi)
- compiler="GCC"
- libc="Newlib"
- linker="GNU LD"
- ;;
- llvm)
- compiler="Clang"
- libc="Glibc"
- linker="LLVM Linker"
- ;;
- esac
- case "${rr[ci_config]}" in
- *-master-*) version="tip of trunk" ;;
- *-release-*) version="latest release branch" ;;
- esac
- target=$(print_gnu_target ${rr[target]})
- bmk_flags=$(echo "${cflags[0]}" | sed -e "s/_/ -/g" -e "s/LTO/flto/g" \
- -e "s/VECT/fdump-tree-vect-details/g")
- case "${rr[ci_project]}" in
- *_apm*) hw="APM Mustang 8x X-Gene1" ;;
- *_tk1*) hw="NVidia TK1 4x Cortex-A15" ;;
- *_tx1*) hw="NVidia TX1 4x Cortex-A57" ;;
- *_stm32*) hw="STMicroelectronics STM32L476RGTx 1x Cortex-M4" ;;
- esac
+ assert_with_msg "Benchmarking succeeded, but no annex/bmk-data results" \
+ [ -d "$new_results" ]
- cat >> $run_step_artifacts/mail-body.txt <<EOF
-
-Configuration:
-- Benchmark: $bmk_suite
-- Toolchain: $compiler + $libc + $linker
-- Version: all components were built from their $version
-- Target: $target
-- Compiler flags: $bmk_flags
-- Hardware: $hw
-
-This benchmarking CI is work-in-progress, and we welcome feedback and suggestions at linaro-toolchain@lists.linaro.org . In our improvement plans is to add support for SPEC CPU2017 benchmarks and provide "perf report/annotate" data behind these reports.
-EOF
-
- # Generate mail subject
- if [ -f $run_step_artifacts/exe.regressions ]; then
- IFS=, read metric bmk symbol short_regression regression \
- < <(head -n1 $run_step_artifacts/exe.regressions)
- elif [ -f $run_step_artifacts/symbol.regressions ]; then
- IFS=, read metric bmk symbol short_regression regression \
- < <(head -n1 $run_step_artifacts/symbol.regressions)
- else
- # Exit with no regressions
- return 0
+ if ! [ -d "$ref_results" ]; then
+ # base-artifacts has no reference results.
+ # This can happen on init build (update_baseline=init).
+ # In such cases we compare results to themselves just as an exercise.
+ ref_results="$new_results"
+ assert_with_msg "No reference results" \
+ [ "${rr[update_baseline]}" = "init" ]
fi
- cat > $run_step_artifacts/mail-subject.txt <<EOF
-[TCWG CI] $short_regression after \$COMMIT_COMPONENT: \$COMMIT_SUBJECT
-EOF
- cat $run_step_artifacts/jira-body.txt \
- | sed -e "s/^/# /" > $run_step_artifacts/results.regressions
- echo "# $short_regression" >> $run_step_artifacts/results.regressions
- )
-}
-
-compare_results_vect ()
-{
- (
- set -euf -o pipefail
- echo "bmk,symbol,result" > $run_step_artifacts/results-compare.csv
-
- while IFS=, read -a arr; do
- bmk=${arr[0]}
- # hack to trim padding
- symbol=$(echo ${arr[1]} | xargs)
- base_num_vect_loops=${arr[3]}
- target_num_vect_loops=${arr[4]}
- if (( base_num_vect_loops > target_num_vect_loops )); then
- echo "$bmk, $symbol, $base_num_vect_loops, $target_num_vect_loops" \
- >> $run_step_artifacts/results-compare.csv
- fi
- done < <(tail -n +2 $run_step_artifacts/results.csv)
- )
-}
-
-compare_results ()
-{
- (
- set -euf -o pipefail
-
- local metric=$1
- local ref_results_id="$2"
- local new_results_id="$3"
- local cmp_options="$4"
-
- local results_ref results_new
- results_ref=$(cat $ref_results_id)
- results_new=$(cat $new_results_id)
-
- case "${rr[target]}" in
- "arm_eabi")
- cmp_options="$cmp_options --has_perf_logs no"
- ;;
- esac
-
+ # Compare vs previous run
+ mkdir -p ${rr[top_artifacts]}/results-vs-prev
+ ln -s ../results-vs-prev $run_step_artifacts/results-vs-prev
$scripts/tcwg-benchmark-results.sh \
- --results_ref $results_ref ++results $results_new \
- --top_artifacts "$run_step_artifacts" --verbose $verbose \
- --metric "$metric" $cmp_options \
- > $run_step_artifacts/results.log 2>&1
-
- case $metric in
- "perf")
- compare_results_perf
- ;;
- "vect")
- compare_results_vect
- ;;
- *)
- echo "Invalid metric: $metric";
- exit 1
- ;;
- esac
- )
-}
-
-# Exit with code 0 if no new regressions between results_id-1 and -2 compared to
-# regression between results_id-1 and -2 in base-artifacts/.
-no_regression_vs_p ()
-{
- (
- set -euf -o pipefail
-
- local ref_artifacts=$1
- local new_artifacts=$2
-
- # Check for build and correctness regressions.
- no_build_regression_p "$@"
-
- # Generate ref-results-compare.csv. The value of "1" means that the result
- # in the 2nd run is no worse than the result in the 1st run (as expected).
- # The value of "100" means that the result in the 2nd run is worse than
- # the result in the 1st run (unexpected).
- # Note that we can grab previously-generated ref-results-compares.csv from
- # base-artifacts/, but it could have been generated with an older version
- # of scripts, so it's safer and more resilient to re-generate it from original
- # perf data.
- if [ ! -f "$ref_artifacts/results_id-1" ] || [ ! -f "$ref_artifacts/results_id-2" ]; then
- return 0
+ --results_ref "$ref_results" ++results "$new_results" \
+ --top_artifacts "${rr[top_artifacts]}/results-vs-prev" \
+ --verbose $verbose --hw_tag "$(tcwg_bmk_hw)" \
+ $compare_opts \
+ > ${rr[top_artifacts]}/results-vs-prev/tcwg-benchmark-results.log 2>&1 &
+
+ local res
+ res=0 && wait $! || res=$?
+ if [ $res != 0 ]; then
+ return $EXTERNAL_FAIL
fi
- # <Workaround> missing reference results, which we have listed in
- # tcwg-benchmark-results.broken-list. Once all entries referencing missing
- # results are discarded, we'll remove this workaround.
- # Otherwise compare_results will fail while fetching baseline results,
- # and we'll consider this failure as a regression.
- if cat "$scripts/tcwg-benchmark-results.broken-list" \
- | grep -q "^$(cat $ref_artifacts/results_id-1)\$\|^$(cat $ref_artifacts/results_id-2)\$"; then
- return 0
- fi
- # </Workaround>
- compare_results "${rr[metric]}" "$ref_artifacts/results_id-1" "$ref_artifacts/results_id-2" \
- "--num_dsos 1 --num_symbols 0"
- while IFS= read -r -d '' i
- do
- mv $i "$(dirname $i)"/ref-"$(basename $i)"
- done < <(find $run_step_artifacts/ -type f -name "results*" -print0)
+ # Below call to output-bmk-results.py creates *.regression files.
+ assert_with_msg "Found stale regression files" \
+ [ x"$(find $run_step_artifacts/ -name "*.regression" | wc -l)" = x"0" ]
- # Similarly, generate new-results-compare.csv.
- if [ ! -f "$new_artifacts/results_id-1" ] || [ ! -f "$new_artifacts/results_id-2" ]; then
- return 1
- fi
- compare_results "${rr[metric]}" "$new_artifacts/results_id-1" "$new_artifacts/results_id-2" \
- "--num_dsos 1 --num_symbols 0"
- while IFS= read -r -d '' i
- do
- mv $i "$(dirname $i)"/new-"$(basename $i)"
- done < <(find $run_step_artifacts/ -type f -name "results*" -print0)
-
- # Now compare the two reports.
- # If "ref" has value of "100" (bad state), and "new" has value of "100"
- # (also bad state), then we get no change, no regression, and final value
- # of 100% * 100/100 == 100.
- #
- # If "ref" has value of "1" (good state), and "new" has value of "1"
- # (also good state), then we get no change, no regression, and final value
- # of 100% * 1/1 == 100.
- #
- # If "ref" has value of "100" (bad state), and "new" has value of "1"
- # (good state), then we get a progression, and final value
- # of 100% * 1/100 == 1.
- #
- # If "ref" has value of "1" (good state), and "new" has value of "100"
- # (bad state), then we get a regression, and final value
- # of 100% * 100/1 == 10000. We detect this below by comparing vs "5000".
- $scripts/../bmk-scripts/csvs2table.py -p 0 --relative $run_step_artifacts/ref-results-compare.csv $run_step_artifacts/new-results-compare.csv > $run_step_artifacts/results-compare.csv
-
- local -a arr
- local bmk symbol result status prev_bmk
- local -a bisect_bmks
-
- # Read result lines from <(tail -n +2 ...) below.
- # "-n +2" is to skip the header line. Set $status to "1" if there is
- # a regression.
- status=0
- prev_bmk=""
- # Delete results.regressions generated by compare_results() calls above.
- rm -f $run_step_artifacts/results.regressions
- while IFS=, read -a arr; do
- bmk=${arr[0]}
- symbol=${arr[1]}
- result=${arr[2]}
- if ! [ "$result" -le "5000" ]; then
- echo "# $bmk,$symbol regressed" >> $run_step_artifacts/results.regressions
- status=1
- if [ x"$bmk" != x"$prev_bmk" ]; then
- bisect_bmks+=("++benchmarks" "$bmk")
- prev_bmk="$bmk"
- fi
+ # Extract 5 most recent compare-results-vs-prev-internal.csv files from
+ # base-artifacts and compute std deviation out of them
+ local -a csvs_paths
+ csvs_paths=("results-vs-prev/compare-results-internal.csv"
+ "$(basename $run_step_artifacts)/compare-results-vs-prev-internal.csv")
+
+ local -a history_csvs
+ local csv history_root=""
+ while read csv; do
+ if [ "$history_root" = "" ]; then
+ history_root="$csv"
+ continue
+ fi
+
+ history_csvs+=("$csv")
+ done < <(get_git_history -0 base-artifacts "${csvs_paths[@]}")
+
+ local csv tmpf
+ local -a compare_results_list=()
+ tmpf=$(mktemp)
+
+ # FIXME:
+ # To deal with some differences along base-artifacts recent history
+ # - remove 'Failed for column' message from csv file
+ # - skip emtpy csv files.
+ for csv in "${history_csvs[@]}"; do
+ grep -v 'Failed for column' "$csv" > "$tmpf" || true
+ cp "$tmpf" "$csv"
+ if [ -s "$csv" ]; then
+ compare_results_list+=("$csv")
fi
- done < <(tail -n +2 $run_step_artifacts/results-compare.csv)
- echo "extra_build_params=${bisect_bmks[*]}" > $run_step_artifacts/extra-bisect-params
- return $status
- )
-}
+ done
-# Exit with code 0 if no regression compared to base-artifacts/.
-# Inspect build results ./results and performance results in ./results_id.
-no_regression_to_base_p ()
-{
- (
- set -euf -o pipefail
+ if [ ${#compare_results_list[@]} != 0 ]; then
+ $scripts/../bmk-scripts/compute-variability.py \
+ --inputs "${compare_results_list[@]}" ${rr[top_artifacts]}/results-vs-prev/compare-results-internal.csv \
+ --weights linear --method avg \
+ --output ${rr[top_artifacts]}/results-vs-prev/bmk-specific-variability-avg.csv || true
- no_build_regression_p "$@"
+ $scripts/../bmk-scripts/compute-variability.py \
+ --inputs "${compare_results_list[@]}" ${rr[top_artifacts]}/results-vs-prev/compare-results-internal.csv \
+ --weights 2-peaks-linear --method max \
+ --output ${rr[top_artifacts]}/results-vs-prev/bmk-specific-variability-max.csv || true
+ fi
- local ref_artifacts=$1
- local new_artifacts=$2
+ rm -rf "$history_root" "$tmpf"
- if ! [ -f "$ref_artifacts/results_id" ]; then
- return 0
- fi
- # <Workaround> missing reference results, which we have listed in
- # tcwg-benchmark-results.broken-list. Once all entries referencing missing
- # results are discarded, we'll remove this workaround.
- # Otherwise compare_results will fail while fetching baseline results,
- # and we'll consider this failure as a regression.
- if cat "$scripts/tcwg-benchmark-results.broken-list" \
- | grep -q "^$(cat $ref_artifacts/results_id)\$"; then
- return 0
- fi
- # </Workaround>
- if ! [ -f "$new_artifacts/results_id" ]; then
- return 1
- fi
+ $scripts/../bmk-scripts/output-bmk-results.py \
+ --compare_results ${rr[top_artifacts]}/results-vs-prev/compare-results-internal.csv \
+ --variability_file ${rr[top_artifacts]}/results-vs-prev/bmk-specific-variability-avg.csv \
+ --variability_file_data "avg" \
+ --run_step_dir "$run_step_artifacts"/ \
+ --metric "${rr[metric_id]}" --mode "${rr[mode]}" \
+ --details quiet > $run_step_artifacts/output-bmk-results.log
- # Make sure there is no stray results.regression file, which we use
- # as failure marker.
- # We can, potentially, call ${rr[no_regression_p]} several times in
- # a row during update_baseline() step, but we should stop at the first
- # regression. Therefore, we should never see results.regressions exist.
- assert ! [ -f $run_step_artifacts/results.regressions ]
+ # copy inputs useful to build the mail / jira / .. to mail dir
+ for resfile in $run_step_artifacts/{exe,symbol}.{regression,improvement}; do
+ if [ -f $resfile ]; then
+ cp $resfile ${rr[top_artifacts]}/notify/
+ fi
+ done
- local compare_opts=""
- case "${cflags[0]}" in
- *"_LTO"*) compare_opts="--num_symbols 0 --entry_threshold 10" ;;
- esac
- compare_results "${rr[metric]}" "$ref_artifacts/results_id" "$new_artifacts/results_id" "$compare_opts"
+ # return status rely on the presence of the results.regressions file
if [ -f $run_step_artifacts/results.regressions ]; then
+ assert_with_msg "Found a regression while comparing the build against itself" \
+ [ "$ref_results" != "$new_results" ]
return 1
fi
return 0
)
}
-# Implement rr[breakup_updated_components] hook.
-tcwg_bmk_breakup_updated_components ()
-{
- (
- set -euf -o pipefail
-
- # Compiler changes tend to cause the most regressions.
- # Breakup updated components into compiler and the rest of components
- # to reduce the number of builds.
- local cc
- case "${rr[toolchain]}" in
- llvm) cc="llvm" ;;
- gnu|gnu_eabi) cc="gcc" ;;
- *) assert false ;;
- esac
-
- if print_updated_components "\n" | grep -q "^$cc\$"; then
- echo "$cc"
- print_updated_components "\n" | grep -v "^$cc\$" | tr '\n' ' ' | sed -e "s/ \$//g"
- echo
- else
- print_updated_components "\n"
- fi
- )
-}
-rr[breakup_updated_components]=tcwg_bmk_breakup_updated_components
-
-run_step stop_on_fail -10 reset_artifacts
-run_step stop_on_fail x prepare_abe
-run_step skip_on_fail -9 build_abe binutils
-run_step skip_on_fail -8 build_abe stage1 -- "${gcc_override_configure[@]}"
-run_step skip_on_fail x clean_sysroot
-case "${rr[components]}" in
- *glibc*)
- run_step skip_on_fail -7 build_abe linux
- run_step skip_on_fail -6 build_abe glibc
+# Compiler changes tend to cause the most regressions.
+# Breakup updated components into compiler and the rest of components
+# to reduce the number of builds.
+case "${rr[toolchain]}" in
+ llvm)
+ rr[breakup_changed_components]="breakup_changed_components llvm"
;;
- *newlib*)
- run_step skip_on_fail -6 build_abe newlib
+ gnu|gnu_eabi)
+ rr[breakup_changed_components]="breakup_changed_components gcc"
;;
+ *) assert false ;;
esac
-patch_branch=""
-if [ x"${rr[metric]}" = x"vect" ]; then
- patch_branch="--patch linaro-local/vect-metric-branch"
-fi
-
-run_step skip_on_fail -5 build_abe stage2 -- $patch_branch "${gcc_override_configure[@]}"
-
+run_step stop_on_fail -10 reset_artifacts
case "${rr[toolchain]}" in
- llvm) run_step skip_on_fail -3 build_llvm true ;;
-esac
-case "${#cflags[@]}" in
- 2)
- # Don't bisect benchmark build/run failures in *-vs-* configurations.
- # Bisections happen only for regressions with build scores >=0,
- # which will happen if benchmark "${cflags[1]}" succeeds.
- run_step skip_on_fail -1 benchmark -- "${cflags[0]}" ${rr[top_artifacts]}/results_id-1
- run_step skip_on_fail 0 benchmark -- "${cflags[1]}" ${rr[top_artifacts]}/results_id-2
- # Set final "build" score to "1" for compatibility with older results
- run_step skip_on_fail 1 true
- rr[no_regression_p]=no_regression_vs_p
- run_step reset_on_fail x check_regression
+ gnu*)
+ run_step stop_on_fail x prepare_abe
+ run_step skip_on_fail -9 build_abe binutils
+ run_step skip_on_fail -8 build_abe stage1 -- \
+ "${gcc_override_configure[@]}"
+ run_step skip_on_fail x clean_sysroot
+ case "${rr[components]}" in
+ *glibc*)
+ run_step skip_on_fail -7 build_abe linux
+ run_step skip_on_fail -6 build_abe glibc
+ ;;
+ *newlib*)
+ run_step skip_on_fail -6 build_abe newlib
+ ;;
+ esac
+ run_step skip_on_fail -5 build_abe stage2 -- \
+ "${gcc_override_configure[@]}"
;;
- 1)
- # Bisect benchmark build/run failures in non-vs configurations.
- # Set score to "0" with "true".
- run_step skip_on_fail 0 true
- run_step skip_on_fail 1 benchmark -- "${cflags[0]}" ${rr[top_artifacts]}/results_id
- rr[no_regression_p]=no_regression_to_base_p
- run_step reset_on_fail x check_regression
+ llvm)
+
+ run_step skip_on_fail -3 build_bmk_llvm
;;
esac
-run_step stop_on_fail x update_baseline
-run_step stop_on_fail x push_baseline
+run_step skip_on_fail 1 benchmark -- "$cflags"
+run_step reset_on_fail x check_regression
trap "" EXIT