diff options
Diffstat (limited to 'tcwg_bmk-build.sh')
-rwxr-xr-x | tcwg_bmk-build.sh | 993 |
1 files changed, 302 insertions, 691 deletions
diff --git a/tcwg_bmk-build.sh b/tcwg_bmk-build.sh index a5b60197..ce99257b 100755 --- a/tcwg_bmk-build.sh +++ b/tcwg_bmk-build.sh @@ -10,80 +10,66 @@ scripts=$(dirname $0) convert_args_to_variables "$@" -obligatory_variables rr[ci_project] rr[ci_config] ssh_host ssh_port +obligatory_variables rr[ci_project] rr[ci_config] declare -A rr -# Execution mode: baseline, bisect, jenkins-full -rr[mode]="${rr[mode]-baseline}" +# All bmk config about hw and benchs is implemented in this file +# shellcheck source=tcwg_bmk-config.sh +. $scripts/tcwg_bmk-config.sh + +# Execution mode: build or bisect +rr[mode]="${rr[mode]-build}" # Set custom revision for one of the projects, and use baseline revisions # for all other projects. rr[baseline_branch]="${rr[baseline_branch]-linaro-local/ci/${rr[ci_project]}/${rr[ci_config]}}" -rr[update_baseline]="${rr[update_baseline]-update}" +rr[update_baseline]="${rr[update_baseline]-ignore}" rr[top_artifacts]="${rr[top_artifacts]-$(pwd)/artifacts}" -# Set metric to perf by default. -rr[metric]="${rr[metric]-perf}" - -# {toolchain_name}-{toolchain_ver}-{target}-{bmk}-{cflags} -IFS=- read -a ci_config <<EOF -${rr[ci_config]} +# ${ci_project}--${ci_config} format is : +# 'tcwg_bmk-#{PROFILE_NAME}-#{BMK}--#{TOOLCHAIN}-#{TARGET}-{toolchain_ver}-{cflags}' +IFS=- read -a ci_pjt_cfg <<EOF +${rr[ci_project]}--${rr[ci_config]} EOF -rr[toolchain]=${rr[toolchain]-${ci_config[0]}} -rr[target]=${rr[target]-${ci_config[2]}} -benchmarks=("${benchmarks[@]-${ci_config[3]}}") -if [ x"${benchmarks[*]}" = x"default" ]; then - benchmarks=("${ci_config[3]}") -fi -if ! test_array cflags; then - ci_config=("${ci_config[@]:4}") - # In ${ci_config[@]} we now have "-"-separated entries (due to IFS=- above). - # We restore "-" in compiler flags when doing flags="$flags-$flag" below. - # We use "_" to separate compiler options, and it is translated to " -" - # in benchmark(). - cflags=() - while [ ${#ci_config[@]} -ge 1 ]; do - flags="" - while [ ${#ci_config[@]} -ge 1 ]; do - flag="${ci_config[0]}" - ci_config=("${ci_config[@]:1}") - if [ x"$flag" = x"vs" ]; then - break - fi - flags="$flags-$flag" - done - cflags+=("$flags") - done -fi + +rr[toolchain]=${rr[toolchain]-${ci_pjt_cfg[4]}} +rr[target]=${rr[target]-${ci_pjt_cfg[5]}} + +cflags="${cflags--${ci_pjt_cfg[7]}}" gcc_mode="" -for i in $(seq 0 $(("${#cflags[@]}" - 1))); do - cflags_mode="" - if [[ x"${cflags[$i]}" == x*"VECT"* ]]; then - rr[metric]="vect" - fi +case "${rr[target]}:$cflags" in + "arm:"*"mthumb"*) gcc_mode=thumb ;; + "arm:"*"marm"*) gcc_mode=arm ;; + "arm:-Os"*|"arm:-Oz"*) + gcc_mode=thumb + cflags="${cflags}_mthumb" + ;; + "arm:"*) + gcc_mode=arm + cflags="${cflags}_marm" + ;; + "arm_eabi:"*) + cflags="${cflags}_mthumb" + ;; +esac - case "${rr[target]}:${cflags[$i]}" in - "arm:"*"mthumb"*) cflags_mode=thumb ;; - "arm:"*"marm"*) cflags_mode=arm ;; - "arm:-Os"*|"arm:-Oz"*) - cflags_mode=thumb - cflags[$i]="${cflags[$i]}_mthumb" - ;; - "arm:"*) - cflags_mode=arm - cflags[$i]="${cflags[$i]}_marm" - ;; - "arm_eabi:"*) - cflags[$i]="${cflags[$i]}_mthumb" - ;; - esac - if [ x"$gcc_mode" = x"" ]; then - gcc_mode="$cflags_mode" - elif [ x"$gcc_mode" != x"$cflags_mode" ]; then - assert_with_msg "Unsupported arm/thumb configuration ${cflags[$(($i - 1))]} and ${cflags[$i]}" false - fi -done +cflags="$(echo $cflags | sed -e "s/_/ -/g" -e "s/LTO/flto/g")" + +case "${rr[ci_project]}" in + *-*_size-*) rr[metric_id]="size" ;; + *-*_speed-*) rr[metric_id]="sample" ;; + *-*_vect-*) rr[metric_id]="num_vect_loops" ;; + *-*_sve-*) rr[metric_id]="num_sve_loops" ;; + *) assert_with_msg "Cannot determine metric from ${rr[ci_project]}" false ;; +esac + +called_from_notify=${called_from_notify-false} + +hw=$(tcwg_bmk_hw) +hw=${hw%_32} ; hw=${hw%_64} + +# ----------------------------------------------------------------------- gcc_override_configure=() # Set default ARM/Thumb mode for AArch32 compiler. This ensures that libraries @@ -103,15 +89,21 @@ gcc_override_configure+=("--set" "gcc_override_configure=--disable-libsanitizer" # board type. case "${rr[target]}" in "arm_eabi") gcc_override_configure+=("--set" "gcc_override_configure=--disable-multilib" - "--set" "gcc_override_configure=--with-cpu=cortex-m4" "--set" "gcc_override_configure=--with-mode=thumb" "--set" "gcc_override_configure=--with-float=hard" ) ;; esac + +rr[cpu]=$(tcwg_bmk_cpu) +if [ "${rr[cpu]}" != "" ]; then + gcc_override_configure+=("--set" "gcc_override_configure=--with-cpu=${rr[cpu]}") + cflags="$cflags -mcpu=${rr[cpu]}" +fi + case "${rr[toolchain]}" in llvm) - rr[components]="binutils gcc linux glibc llvm" ;; + rr[components]="llvm" ;; gnu) rr[components]="binutils gcc linux glibc" ;; gnu_eabi) @@ -137,24 +129,19 @@ trap print_traceback EXIT default_start_at="" default_finish_at="" case "${rr[mode]}" in - "baseline") - default_finish_at="update_baseline" - ;; "bisect") single_updated_component="$(print_single_updated_component)" case $single_updated_component in binutils) default_start_at="build_abe-binutils" ;; gcc) default_start_at="build_abe-stage1" ;; linux|glibc) default_start_at="clean_sysroot" ;; - llvm) default_start_at="build_llvm-true" ;; + llvm) default_start_at="build_bmk_llvm" ;; newlib) default_start_at="build_abe-newlib" ;; *) assert_with_msg \ - "Invalid single updated component \"$single_updated_component\"" false - ;; + "Invalid single updated component \"$single_updated_component\"" false + ;; esac - default_finish_at="check_regression" ;; - "jenkins-full") ;; esac if [ x"$start_at" = x"default" ]; then start_at="$default_start_at" @@ -163,15 +150,52 @@ if [ x"$finish_at" = x"default" ]; then finish_at="$default_finish_at" fi +case "${rr[ci_project]}/${rr[ci_config]}" in + tcwg_bmk-code_speed-cpu2017rate/gnu-aarch64-master-O2|\ + tcwg_bmk-code_speed-cpu2017rate/gnu-aarch64-master-O3|\ + tcwg_bmk-code_speed-cpu2017rate/llvm-aarch64-master-O2|\ + tcwg_bmk-code_speed-cpu2017rate/llvm-aarch64-master-O3) + rr[major]=3 + rr[minor]=0 + ;; + *) + rr[major]=2 + rr[minor]=3 + ;; +esac + run_step_init "$start_at" "$finish_at" "${rr[top_artifacts]}" "$verbose" +build_bmk_llvm () +{ + ( + set -euf -o pipefail + + local projects="clang;lld;openmp" + case "${rr[target]}" in + aarch64) + # Flang is not supported for AArch32 + projects="$projects;flang" + ;; + esac + + build_llvm "$projects" "" "${rr[metric_id]}" + + # Copy shared libraries to runtime sysroot dir + mkdir -p llvm-install/libc + rsync -a --del --include "*/" --include "*.so*" --exclude "*" \ + --delete-excluded llvm-install/lib/ llvm-install/libc/lib/ + ) +} + benchmark () { + obligatory_variables ssh_host ssh_port + ( set -euf -o pipefail - local bmk_cflags="$2" - local results_id_file="$3" + local bmk_flags="$2" sanity_check_pwd @@ -179,740 +203,327 @@ benchmark () rm -rf "$(pwd)"/bin mkdir "$(pwd)"/bin - local bmk_flags bmk_ldflags reboot run_profile - bmk_flags="$(echo $bmk_cflags | sed -e "s/_/ -/g" -e "s/LTO/flto/g" \ - -e "s/VECT/fdump-tree-vect-details/g")" - case "$bmk_cflags" in - "-Os"*|"-Oz"*) + local reboot run_profile + + local hw_tag + hw_tag=$(tcwg_bmk_hw) + + case "${rr[ci_project]}" in + *_size*|*_vect*|*_sve*) reboot=false run_profile="parallel" + testmode="verify" ;; *) reboot=true run_profile="serial" + testmode="benchmark" ;; esac - local bench_list bin cc gnu_host gnu_target sysroot toolchain - gnu_host=$(print_gnu_target native) - gnu_target=$(print_gnu_target ${rr[target]}) - sysroot="$(pwd)/abe/builds/destdir/$gnu_host/$gnu_target/libc" + local bench_list bin cc sysroot toolchain case "${rr[toolchain]}" in llvm) - local llvm_target - llvm_target=$(echo "$gnu_target" | sed -e "s/^arm-/armv7a-/") - bmk_flags="$bmk_flags --target=$llvm_target --sysroot=$sysroot" - bmk_ldflags="$bmk_flags" - # Use LLD for LLVM configurations. - # Also, BFD linker crashes for AArch32 LTO builds, - # see https://projects.linaro.org/browse/LLVM-562 . - case "$bmk_ldflags" in - *"-fuse-ld="*) ;; - *) bmk_ldflags="$bmk_ldflags -fuse-ld=lld" ;; - esac + sysroot="$(pwd)/llvm-install/libc" bin="$(pwd)/llvm-install/bin" cc="$bin/" toolchain="llvm" ;; gnu|gnu_eabi) - bmk_ldflags="$bmk_flags" + local gnu_host gnu_target + gnu_host=$(print_gnu_target native) + gnu_target=$(print_gnu_target ${rr[target]}) + sysroot="$(pwd)/abe/builds/destdir/$gnu_host/$gnu_target/libc" bin="$(pwd)/abe/builds/destdir/$gnu_host/bin" cc="$bin/$gnu_target-" toolchain="gnu" + # Append -fdump-statistics-asmname to obtain compile time metrics. + bmk_flags="$bmk_flags -fdump-statistics-asmname -fdump-tree-vect-details" ;; esac - case "${rr[toolchain]}:${benchmarks[*]}" in - llvm:spec2k6) bench_list="c_and_cxx" ;; - gnu:spec2k6) bench_list="all" ;; - llvm:spec2017) bench_list="spec2017_speed_nofortran" ;; - gnu:spec2017) bench_list="spec2017_speed" ;; - *) bench_list="${benchmarks[*]}" ;; - esac + + bench_list="$(tcwg_bmk_benchs)" + # shellcheck disable=SC2154 sysroot="ssh://$ssh_host:$ssh_port:$sysroot" - local hw_tag - case "${rr[ci_project]}:${rr[target]}" in - *_apm_32*:*) hw_tag=apm_32 ;; - *_apm_64*:*) hw_tag=apm_64 ;; - *_apm*:arm*) hw_tag=apm_32 ;; - *_apm*:aarch64) hw_tag=apm_64 ;; - *_sq_32*:*) hw_tag=sq_32 ;; - *_sq_64*:*) hw_tag=sq_64 ;; - *_sq*:arm*) hw_tag=sq_32 ;; - *_sq*:aarch64) hw_tag=sq_64 ;; - *_tk1_32*:*) hw_tag=tk1_32 ;; - *_tk1*:arm*) hw_tag=tk1_32 ;; - *_tx1_32*:*) hw_tag=tx1_32 ;; - *_tx1_64*:*) hw_tag=tx1_64 ;; - *_tx1*:arm*) hw_tag=tx1_32 ;; - *_tx1*:aarch64) hw_tag=tx1_64 ;; - *_stm32*:arm*) hw_tag=stm32 ;; - *) echo "ERROR: Unknown hw_tag for ${rr[ci_project]}:${rr[target]}"; exit 1 ;; - esac - local hw image_arch toolchain_proto + toolchain_proto=ssh case "$hw_tag" in - apm_32) hw=apm; image_arch=armhf ;; - apm_64) hw=apm; image_arch=arm64 ;; - sq_32) hw=sq; image_arch=armhf ;; - sq_64) hw=sq; image_arch=arm64 ;; - tk1_32) hw=tk1; image_arch=armhf ;; - tx1_32) hw=tx1; image_arch=armhf ;; - tx1_64) hw=tx1; image_arch=arm64 ;; stm32) hw=stm32; image_arch=amd64 # When running benchmarks on stm32, we prefer to rsync the # toolchain to the board's host machine -- dev-02.tcwglab. toolchain_proto=rsync ;; + *_32) hw=${hw_tag/_32}; image_arch=armhf ;; + *_64) hw=${hw_tag/_64}; image_arch=arm64 ;; *) echo "ERROR: Unknown hw_tag $hw_tag"; exit 1 ;; esac - local results_id="$hw_tag/${rr[ci_project]}/${rr[mode]}-${rr[ci_config]}/@build_num@" + # Create directory for tcwg-benchmark to upload results to. + # Note that files inside $results_dir will be owned by tcwg-benchmark. + local results_dir + results_dir="$(mktemp -d)" + chmod 0777 "$results_dir" + # Trigger benchmarking job and capture its console output. + # Ignore exit code of the trigger command to detect various failure + # conditions from examining the console log. # shellcheck disable=SC2154 remote_exec "ci.linaro.org:2222::-l $USER@linaro.org" \ - build tcwg-benchmark -w \ + build tcwg-benchmark -f -v \ -p bmk_hw=$hw \ -p bench_list="$bench_list" \ -p cflags="$bmk_flags" \ - -p ldflags="$bmk_ldflags" \ - -p testmode=benchmark \ - -p displaytag="${rr[ci_project]}/${rr[mode]}-${rr[ci_config]}" \ + -p ldflags="$bmk_flags" \ + -p testmode="$testmode" \ + -p displaytag="${rr[ci_project]}/${rr[ci_config]}-${rr[mode]}" \ -p ignore_errors=true \ -p toolchain_url=$toolchain_proto://$ssh_host:$ssh_port:$cc \ -p toolchain_type=$toolchain \ -p sysroot="$sysroot" \ - -p results_id="$results_id" \ + -p results_dest="$ssh_host:$ssh_port:$results_dir" \ -p reboot="$reboot" \ -p run_profile="$run_profile" \ -p image_arch="$image_arch" \ ${scripts_branch+-p scripts_branch="$scripts_branch"} \ ${bmk_branch+-p bmk_branch="$bmk_branch"} \ - | tee $run_step_artifacts/benchmark-start.log + | tee $run_step_artifacts/benchmark-build.log || true local build_num - build_num=$(cat $run_step_artifacts/benchmark-start.log \ - | sed -e "s/.*#\([0-9]\+\).*/\1/") + build_num=$(head -n1 $run_step_artifacts/benchmark-build.log \ + | sed -e "s/Started.*#\([0-9]\+\).*/\1/") assert_with_msg "Benchmark build number should not be 0!" \ - [ "$build_num" -gt "0" ] + [ "$build_num" -gt "0" ] + + cat > $run_step_artifacts/benchmark_job.txt << EOF +Url: https://ci.linaro.org/job/tcwg-benchmark/$build_num +Name: $(curl -s "https://ci.linaro.org/job/tcwg-benchmark/$build_num/api/json?tree=displayName" \ + | jq -r ".displayName") +EOF local build_status local build_ret while true; do - (remote_exec "ci.linaro.org:2222::-l $USER@linaro.org" \ - console tcwg-benchmark -n 1 -f $build_num || true) \ - | tee -a $run_step_artifacts/benchmark.log - # Ssh connection to ci.linaro.org occasionally drops. We need # to check whether benchmarking has finished, and, if not, continue - # to watch its output. - build_status=$(tail -n 1 $run_step_artifacts/benchmark.log) + # waiting. + build_status=$(curl -s \ + "https://ci.linaro.org/job/tcwg-benchmark/$build_num/api/json?tree=result" \ + | jq -r ".result") case "$build_status" in - "Finished: SUCCESS") + "null") + # Continue waiting + true + ;; + "SUCCESS") build_ret=0 break ;; - "Finished: "*) - echo "# Benchmarking infra is offline:" >> ${rr[top_artifacts]}/results + *) + echo "# Benchmarking infra is offline:" \ + >> ${rr[top_artifacts]}/results echo "-$EXTERNAL_FAIL" >> ${rr[top_artifacts]}/results build_ret=1 break ;; esac - # Sleep a little to avoid flooding ci.linaro.org on transient ssh - # failures. - sleep 5 + # Wait by following console output + (ssh -p2222 -l $USER@linaro.org ci.linaro.org \ + console tcwg-benchmark $build_num -f || true) \ + | tee $run_step_artifacts/benchmark-wait.log done - echo "$results_id" | sed -e "s/@build_num@/$build_num/g" \ - > "$results_id_file" + rm -rf "${rr[top_artifacts]}/annex" + mkdir "${rr[top_artifacts]}/annex" + ln -s "$results_dir" "${rr[top_artifacts]}/annex/bmk-data" + return $build_ret ) } -# Compare results obtained from perf data between $1 and $2 -# and generate results-compare.csv -compare_results_perf () +# Exit with code 0 if no regression compared to base-artifacts/. +no_regression_p () { ( set -euf -o pipefail - local exe_threshold symbol_threshold - case "${cflags[0]}" in - "-Os"*|"-Oz"*) - # We use 1% tolerance for binary size - # and 10% tolerance for symbol size. - exe_threshold=1 - symbol_threshold=10 - ;; - *) - # We use 3% tolerance for binary speed - # and 15% tolerance for symbol speed. - exe_threshold=3 - symbol_threshold=15 - # Reduce thresholds when bisecting to avoid considering borderline - # regressions as spurious. This should break cycles of build and - # bisect jobs triggering each other on borderline regressions. - if [ x"${rr[mode]}" = x"bisect" ]; then - exe_threshold=2 - symbol_threshold=10 - fi - ;; - esac - - local -a arr - local metric bmk symbol rtime rsize time1 time2 size1 size2 - local regression short_symbol short_regression - local result prev_bmk - echo "bmk,symbol,result" > $run_step_artifacts/results-compare.csv - printf "extra_build_params=" > $run_step_artifacts/extra-bisect-params - - assert_with_msg "Found stale regression files" \ - [ x"$(find $run_step_artifacts/ -name "*.regression" | wc -l)" = x"0" ] - - local metric_id regressed_by - case "${cflags[0]}" in - "-Os"*|"-Oz"*) - metric_id="size" - regressed_by="grew in size by" - ;; - *) - metric_id="time" - regressed_by="slowed down by" - ;; - esac - - # Read result lines from <(tail -n +2 ...) below. - # "-n +2" is to skip the header line. - prev_bmk="" - while IFS=, read -a arr; do - bmk=${arr[0]} - symbol=${arr[1]} - rtime=${arr[2]} - rsize=${arr[3]} - time1=${arr[4]} - time2=${arr[5]} - size1=${arr[6]} - size2=${arr[7]} - - case $metric_id in - size) metric=$rsize ;; - time) metric=$rtime ;; - *) assert false ;; - esac - - # Skip case where we have no info ("n/a") - if [ "$metric" != "n/a" ]; then - metric=$(($metric - 100)) - # Remove padding from the tail of $symbol (padding is added by - # csvs2table.py for better formatting). - short_symbol="$(echo "$symbol" | sed -e "s/ *\$//")" - - local bmk_exe - case "$short_symbol" in - "["*) bmk_exe=false ;; - *"_base.default") bmk_exe=true ;; - *) bmk_exe=false ;; - esac - - local threshold - if $bmk_exe; then - threshold=$exe_threshold - else - threshold=$symbol_threshold - fi - - if ! [ "$metric" -le "$threshold" ]; then - result=100 - - case $metric_id in - size) - short_regression="$regressed_by ${metric}%" - regression="$short_regression from $size1 to $size2 bytes" - ;; - time) - short_regression="$regressed_by ${metric}%" - regression="$short_regression from $time1 to $time2 perf samples" ;; - *) assert false ;; - esac - if $bmk_exe; then - short_regression="$bmk $short_regression" - regression="$bmk $regression" - # Detect magic sample counts that indicate failure to build - # and failure to run - case "$time2" in - 888888888) - short_regression="$bmk failed to run correctly" - regression="$short_regression" - ;; - 999999999) - short_regression="$bmk failed to build" - regression="$short_regression" - ;; - esac - echo "$metric,$bmk,$symbol,$short_regression,$regression" >> $run_step_artifacts/exe.regressions - else - short_regression="$bmk:$short_symbol $short_regression" - regression="$bmk:$short_symbol $regression" - echo "$metric,$bmk,$symbol,$short_regression,$regression" >> $run_step_artifacts/$bmk.regression - fi - if [ x"$bmk" != x"$prev_bmk" ]; then - printf "++benchmarks %s " $bmk >> $run_step_artifacts/extra-bisect-params - prev_bmk="$bmk" - fi - else - result=1 - fi - echo "$bmk,$symbol,$result" >> $run_step_artifacts/results-compare.csv - fi - done < <(tail -n +2 $run_step_artifacts/results.csv) - printf "\n" >> $run_step_artifacts/extra-bisect-params - - # Comparison is done. Below we generate regression report. - cat > $run_step_artifacts/jira-body.txt <<EOF -After \$COMMIT_COMPONENT \$COMMIT_LOG -EOF - if [ -f $run_step_artifacts/exe.regressions ]; then - sort -gr -o $run_step_artifacts/exe.regressions \ - $run_step_artifacts/exe.regressions - - cat >> $run_step_artifacts/jira-body.txt <<EOF - -the following benchmarks $regressed_by more than ${exe_threshold}%: -EOF - local exe - while IFS=, read metric exe symbol short_regression regression; do - cat >> $run_step_artifacts/jira-body.txt <<EOF -- $regression -EOF - if [ -f $run_step_artifacts/$exe.regression ]; then - while IFS=, read metric bmk symbol short_regression regression; do - cat >> $run_step_artifacts/jira-body.txt <<EOF - - $regression -EOF - done < $run_step_artifacts/$exe.regression - # Delete $bmk.regressions so that it doesn't show up - # in symbol-regression loop below. - rm $run_step_artifacts/$exe.regression - fi - done < $run_step_artifacts/exe.regressions - fi - - find $run_step_artifacts/ -name "*.regression" -print0 | xargs -0 cat \ - | sort -gr -o $run_step_artifacts/symbol.regressions - if [ x"$(cat $run_step_artifacts/symbol.regressions)" = x"" ]; then - # Delete empty file - rm $run_step_artifacts/symbol.regressions - fi + # check score-based regression + no_build_regression_p "$@" - if [ -f $run_step_artifacts/symbol.regressions ]; then - cat >> $run_step_artifacts/jira-body.txt <<EOF + # At this stage, there's no score-based regression. + # We are now checking metric-based regression. -the following hot functions $regressed_by more than ${symbol_threshold}% (but their benchmarks $regressed_by less than ${exe_threshold}%): -EOF - while IFS=, read metric bmk symbol short_regression regression; do - cat >> $run_step_artifacts/jira-body.txt <<EOF -- $regression -EOF - done < $run_step_artifacts/symbol.regressions - fi + assert_with_msg "Benchmarking succeeded, but bmk-data is missing" \ + [ -e $run_step_top_artifacts/annex/bmk-data ] - cp $run_step_artifacts/jira-body.txt $run_step_artifacts/mail-body.txt + # Make sure there is no stray results.regression file, which we use + # as failure marker. + assert ! [ -f $run_step_artifacts/results.regressions ] - local bmk_suite="" publish_save_temps=false - case "${benchmarks[*]}" in - coremark) bmk_suite="EEMBC CoreMark" ;; - spec2k6|4*) - bmk_suite="SPEC CPU2006" - publish_save_temps=true - ;; - spec2017|5*|6*) - bmk_suite="SPEC CPU2017" - publish_save_temps=true - ;; + local compare_opts="" + case "${rr[target]}:$cflags" in + "arm_eabi":*) compare_opts="--has_perf_logs no" ;; + *) compare_opts="" ;; esac - cat >> $run_step_artifacts/mail-body.txt <<EOF - -Below reproducer instructions can be used to re-build both "first_bad" and "last_good" cross-toolchains used in this bisection. Naturally, the scripts will fail when triggerring benchmarking jobs if you don't have access to Linaro TCWG CI. -EOF - - # Copy save-temps tarballs to artifacts, so that they are accessible. - # We can publish pre-processed source only for benchmarks derived from - # open-source projects. - # Note that we include save-temps artifacts for successful builds so that - # "last_good" build has the artifacts. - if $publish_save_temps; then - mkdir -p $run_step_artifacts/top-artifacts - local s_t - while read s_t; do - rsync -a "$s_t/" $run_step_artifacts/top-artifacts/save-temps/ - done < <(find results-1 -type d -name "save.*.temps") + if [ -f /usr/lib/linux-tools/install-armhf-perf-workaround.sh ]; then + # FIXME: + # In some cases perf report crashes when run from armhf container on + # ARMv8 machine. + # Install a workaround while we are investigating the cause. + sudo /usr/lib/linux-tools/install-armhf-perf-workaround.sh fi - if [ -d $run_step_artifacts/top-artifacts/save-temps/ ]; then - cat >> $run_step_artifacts/mail-body.txt <<EOF - -For your convenience, we have uploaded tarballs with pre-processed source and assembly files at: -- First_bad save-temps: \$FIRST_BAD_ARTIFACTS/save-temps/ -- Last_good save-temps: \$LAST_GOOD_ARTIFACTS/save-temps/ -- Baseline save-temps: \$BASELINE_ARTIFACTS/save-temps/ -EOF - fi + local new_results="${rr[top_artifacts]}/annex/bmk-data" + local ref_results="base-artifacts/annex/bmk-data" - local compiler="" libc="" linker="" version="" target="" bmk_flags="" hw="" - case "${rr[toolchain]}" in - gnu) - compiler="GCC" - libc="Glibc" - linker="GNU Linker" - ;; - gnu_eabi) - compiler="GCC" - libc="Newlib" - linker="GNU LD" - ;; - llvm) - compiler="Clang" - libc="Glibc" - linker="LLVM Linker" - ;; - esac - case "${rr[ci_config]}" in - *-master-*) version="tip of trunk" ;; - *-release-*) version="latest release branch" ;; - esac - target=$(print_gnu_target ${rr[target]}) - bmk_flags=$(echo "${cflags[0]}" | sed -e "s/_/ -/g" -e "s/LTO/flto/g" \ - -e "s/VECT/fdump-tree-vect-details/g") - case "${rr[ci_project]}" in - *_apm*) hw="APM Mustang 8x X-Gene1" ;; - *_tk1*) hw="NVidia TK1 4x Cortex-A15" ;; - *_tx1*) hw="NVidia TX1 4x Cortex-A57" ;; - *_stm32*) hw="STMicroelectronics STM32L476RGTx 1x Cortex-M4" ;; - esac + assert_with_msg "Benchmarking succeeded, but no annex/bmk-data results" \ + [ -d "$new_results" ] - cat >> $run_step_artifacts/mail-body.txt <<EOF - -Configuration: -- Benchmark: $bmk_suite -- Toolchain: $compiler + $libc + $linker -- Version: all components were built from their $version -- Target: $target -- Compiler flags: $bmk_flags -- Hardware: $hw - -This benchmarking CI is work-in-progress, and we welcome feedback and suggestions at linaro-toolchain@lists.linaro.org . In our improvement plans is to add support for SPEC CPU2017 benchmarks and provide "perf report/annotate" data behind these reports. -EOF - - # Generate mail subject - if [ -f $run_step_artifacts/exe.regressions ]; then - IFS=, read metric bmk symbol short_regression regression \ - < <(head -n1 $run_step_artifacts/exe.regressions) - elif [ -f $run_step_artifacts/symbol.regressions ]; then - IFS=, read metric bmk symbol short_regression regression \ - < <(head -n1 $run_step_artifacts/symbol.regressions) - else - # Exit with no regressions - return 0 + if ! [ -d "$ref_results" ]; then + # base-artifacts has no reference results. + # This can happen on init build (update_baseline=init). + # In such cases we compare results to themselves just as an exercise. + ref_results="$new_results" + assert_with_msg "No reference results" \ + [ "${rr[update_baseline]}" = "init" ] fi - cat > $run_step_artifacts/mail-subject.txt <<EOF -[TCWG CI] $short_regression after \$COMMIT_COMPONENT: \$COMMIT_SUBJECT -EOF - cat $run_step_artifacts/jira-body.txt \ - | sed -e "s/^/# /" > $run_step_artifacts/results.regressions - echo "# $short_regression" >> $run_step_artifacts/results.regressions - ) -} - -compare_results_vect () -{ - ( - set -euf -o pipefail - echo "bmk,symbol,result" > $run_step_artifacts/results-compare.csv - - while IFS=, read -a arr; do - bmk=${arr[0]} - # hack to trim padding - symbol=$(echo ${arr[1]} | xargs) - base_num_vect_loops=${arr[3]} - target_num_vect_loops=${arr[4]} - if (( base_num_vect_loops > target_num_vect_loops )); then - echo "$bmk, $symbol, $base_num_vect_loops, $target_num_vect_loops" \ - >> $run_step_artifacts/results-compare.csv - fi - done < <(tail -n +2 $run_step_artifacts/results.csv) - ) -} - -compare_results () -{ - ( - set -euf -o pipefail - - local metric=$1 - local ref_results_id="$2" - local new_results_id="$3" - local cmp_options="$4" - - local results_ref results_new - results_ref=$(cat $ref_results_id) - results_new=$(cat $new_results_id) - - case "${rr[target]}" in - "arm_eabi") - cmp_options="$cmp_options --has_perf_logs no" - ;; - esac - + # Compare vs previous run + mkdir -p ${rr[top_artifacts]}/results-vs-prev + ln -s ../results-vs-prev $run_step_artifacts/results-vs-prev $scripts/tcwg-benchmark-results.sh \ - --results_ref $results_ref ++results $results_new \ - --top_artifacts "$run_step_artifacts" --verbose $verbose \ - --metric "$metric" $cmp_options \ - > $run_step_artifacts/results.log 2>&1 - - case $metric in - "perf") - compare_results_perf - ;; - "vect") - compare_results_vect - ;; - *) - echo "Invalid metric: $metric"; - exit 1 - ;; - esac - ) -} - -# Exit with code 0 if no new regressions between results_id-1 and -2 compared to -# regression between results_id-1 and -2 in base-artifacts/. -no_regression_vs_p () -{ - ( - set -euf -o pipefail - - local ref_artifacts=$1 - local new_artifacts=$2 - - # Check for build and correctness regressions. - no_build_regression_p "$@" - - # Generate ref-results-compare.csv. The value of "1" means that the result - # in the 2nd run is no worse than the result in the 1st run (as expected). - # The value of "100" means that the result in the 2nd run is worse than - # the result in the 1st run (unexpected). - # Note that we can grab previously-generated ref-results-compares.csv from - # base-artifacts/, but it could have been generated with an older version - # of scripts, so it's safer and more resilient to re-generate it from original - # perf data. - if [ ! -f "$ref_artifacts/results_id-1" ] || [ ! -f "$ref_artifacts/results_id-2" ]; then - return 0 + --results_ref "$ref_results" ++results "$new_results" \ + --top_artifacts "${rr[top_artifacts]}/results-vs-prev" \ + --verbose $verbose --hw_tag "$(tcwg_bmk_hw)" \ + $compare_opts \ + > ${rr[top_artifacts]}/results-vs-prev/tcwg-benchmark-results.log 2>&1 & + + local res + res=0 && wait $! || res=$? + if [ $res != 0 ]; then + return $EXTERNAL_FAIL fi - # <Workaround> missing reference results, which we have listed in - # tcwg-benchmark-results.broken-list. Once all entries referencing missing - # results are discarded, we'll remove this workaround. - # Otherwise compare_results will fail while fetching baseline results, - # and we'll consider this failure as a regression. - if cat "$scripts/tcwg-benchmark-results.broken-list" \ - | grep -q "^$(cat $ref_artifacts/results_id-1)\$\|^$(cat $ref_artifacts/results_id-2)\$"; then - return 0 - fi - # </Workaround> - compare_results "${rr[metric]}" "$ref_artifacts/results_id-1" "$ref_artifacts/results_id-2" \ - "--num_dsos 1 --num_symbols 0" - while IFS= read -r -d '' i - do - mv $i "$(dirname $i)"/ref-"$(basename $i)" - done < <(find $run_step_artifacts/ -type f -name "results*" -print0) + # Below call to output-bmk-results.py creates *.regression files. + assert_with_msg "Found stale regression files" \ + [ x"$(find $run_step_artifacts/ -name "*.regression" | wc -l)" = x"0" ] - # Similarly, generate new-results-compare.csv. - if [ ! -f "$new_artifacts/results_id-1" ] || [ ! -f "$new_artifacts/results_id-2" ]; then - return 1 - fi - compare_results "${rr[metric]}" "$new_artifacts/results_id-1" "$new_artifacts/results_id-2" \ - "--num_dsos 1 --num_symbols 0" - while IFS= read -r -d '' i - do - mv $i "$(dirname $i)"/new-"$(basename $i)" - done < <(find $run_step_artifacts/ -type f -name "results*" -print0) - - # Now compare the two reports. - # If "ref" has value of "100" (bad state), and "new" has value of "100" - # (also bad state), then we get no change, no regression, and final value - # of 100% * 100/100 == 100. - # - # If "ref" has value of "1" (good state), and "new" has value of "1" - # (also good state), then we get no change, no regression, and final value - # of 100% * 1/1 == 100. - # - # If "ref" has value of "100" (bad state), and "new" has value of "1" - # (good state), then we get a progression, and final value - # of 100% * 1/100 == 1. - # - # If "ref" has value of "1" (good state), and "new" has value of "100" - # (bad state), then we get a regression, and final value - # of 100% * 100/1 == 10000. We detect this below by comparing vs "5000". - $scripts/../bmk-scripts/csvs2table.py -p 0 --relative $run_step_artifacts/ref-results-compare.csv $run_step_artifacts/new-results-compare.csv > $run_step_artifacts/results-compare.csv - - local -a arr - local bmk symbol result status prev_bmk - local -a bisect_bmks - - # Read result lines from <(tail -n +2 ...) below. - # "-n +2" is to skip the header line. Set $status to "1" if there is - # a regression. - status=0 - prev_bmk="" - # Delete results.regressions generated by compare_results() calls above. - rm -f $run_step_artifacts/results.regressions - while IFS=, read -a arr; do - bmk=${arr[0]} - symbol=${arr[1]} - result=${arr[2]} - if ! [ "$result" -le "5000" ]; then - echo "# $bmk,$symbol regressed" >> $run_step_artifacts/results.regressions - status=1 - if [ x"$bmk" != x"$prev_bmk" ]; then - bisect_bmks+=("++benchmarks" "$bmk") - prev_bmk="$bmk" - fi + # Extract 5 most recent compare-results-vs-prev-internal.csv files from + # base-artifacts and compute std deviation out of them + local -a csvs_paths + csvs_paths=("results-vs-prev/compare-results-internal.csv" + "$(basename $run_step_artifacts)/compare-results-vs-prev-internal.csv") + + local -a history_csvs + local csv history_root="" + while read csv; do + if [ "$history_root" = "" ]; then + history_root="$csv" + continue + fi + + history_csvs+=("$csv") + done < <(get_git_history -0 base-artifacts "${csvs_paths[@]}") + + local csv tmpf + local -a compare_results_list=() + tmpf=$(mktemp) + + # FIXME: + # To deal with some differences along base-artifacts recent history + # - remove 'Failed for column' message from csv file + # - skip emtpy csv files. + for csv in "${history_csvs[@]}"; do + grep -v 'Failed for column' "$csv" > "$tmpf" || true + cp "$tmpf" "$csv" + if [ -s "$csv" ]; then + compare_results_list+=("$csv") fi - done < <(tail -n +2 $run_step_artifacts/results-compare.csv) - echo "extra_build_params=${bisect_bmks[*]}" > $run_step_artifacts/extra-bisect-params - return $status - ) -} + done -# Exit with code 0 if no regression compared to base-artifacts/. -# Inspect build results ./results and performance results in ./results_id. -no_regression_to_base_p () -{ - ( - set -euf -o pipefail + if [ ${#compare_results_list[@]} != 0 ]; then + $scripts/../bmk-scripts/compute-variability.py \ + --inputs "${compare_results_list[@]}" ${rr[top_artifacts]}/results-vs-prev/compare-results-internal.csv \ + --weights linear --method avg \ + --output ${rr[top_artifacts]}/results-vs-prev/bmk-specific-variability-avg.csv || true - no_build_regression_p "$@" + $scripts/../bmk-scripts/compute-variability.py \ + --inputs "${compare_results_list[@]}" ${rr[top_artifacts]}/results-vs-prev/compare-results-internal.csv \ + --weights 2-peaks-linear --method max \ + --output ${rr[top_artifacts]}/results-vs-prev/bmk-specific-variability-max.csv || true + fi - local ref_artifacts=$1 - local new_artifacts=$2 + rm -rf "$history_root" "$tmpf" - if ! [ -f "$ref_artifacts/results_id" ]; then - return 0 - fi - # <Workaround> missing reference results, which we have listed in - # tcwg-benchmark-results.broken-list. Once all entries referencing missing - # results are discarded, we'll remove this workaround. - # Otherwise compare_results will fail while fetching baseline results, - # and we'll consider this failure as a regression. - if cat "$scripts/tcwg-benchmark-results.broken-list" \ - | grep -q "^$(cat $ref_artifacts/results_id)\$"; then - return 0 - fi - # </Workaround> - if ! [ -f "$new_artifacts/results_id" ]; then - return 1 - fi + $scripts/../bmk-scripts/output-bmk-results.py \ + --compare_results ${rr[top_artifacts]}/results-vs-prev/compare-results-internal.csv \ + --variability_file ${rr[top_artifacts]}/results-vs-prev/bmk-specific-variability-avg.csv \ + --variability_file_data "avg" \ + --run_step_dir "$run_step_artifacts"/ \ + --metric "${rr[metric_id]}" --mode "${rr[mode]}" \ + --details quiet > $run_step_artifacts/output-bmk-results.log - # Make sure there is no stray results.regression file, which we use - # as failure marker. - # We can, potentially, call ${rr[no_regression_p]} several times in - # a row during update_baseline() step, but we should stop at the first - # regression. Therefore, we should never see results.regressions exist. - assert ! [ -f $run_step_artifacts/results.regressions ] + # copy inputs useful to build the mail / jira / .. to mail dir + for resfile in $run_step_artifacts/{exe,symbol}.{regression,improvement}; do + if [ -f $resfile ]; then + cp $resfile ${rr[top_artifacts]}/notify/ + fi + done - local compare_opts="" - case "${cflags[0]}" in - *"_LTO"*) compare_opts="--num_symbols 0 --entry_threshold 10" ;; - esac - compare_results "${rr[metric]}" "$ref_artifacts/results_id" "$new_artifacts/results_id" "$compare_opts" + # return status rely on the presence of the results.regressions file if [ -f $run_step_artifacts/results.regressions ]; then + assert_with_msg "Found a regression while comparing the build against itself" \ + [ "$ref_results" != "$new_results" ] return 1 fi return 0 ) } -# Implement rr[breakup_updated_components] hook. -tcwg_bmk_breakup_updated_components () -{ - ( - set -euf -o pipefail - - # Compiler changes tend to cause the most regressions. - # Breakup updated components into compiler and the rest of components - # to reduce the number of builds. - local cc - case "${rr[toolchain]}" in - llvm) cc="llvm" ;; - gnu|gnu_eabi) cc="gcc" ;; - *) assert false ;; - esac - - if print_updated_components "\n" | grep -q "^$cc\$"; then - echo "$cc" - print_updated_components "\n" | grep -v "^$cc\$" | tr '\n' ' ' | sed -e "s/ \$//g" - echo - else - print_updated_components "\n" - fi - ) -} -rr[breakup_updated_components]=tcwg_bmk_breakup_updated_components - -run_step stop_on_fail -10 reset_artifacts -run_step stop_on_fail x prepare_abe -run_step skip_on_fail -9 build_abe binutils -run_step skip_on_fail -8 build_abe stage1 -- "${gcc_override_configure[@]}" -run_step skip_on_fail x clean_sysroot -case "${rr[components]}" in - *glibc*) - run_step skip_on_fail -7 build_abe linux - run_step skip_on_fail -6 build_abe glibc +# Compiler changes tend to cause the most regressions. +# Breakup updated components into compiler and the rest of components +# to reduce the number of builds. +case "${rr[toolchain]}" in + llvm) + rr[breakup_changed_components]="breakup_changed_components llvm" ;; - *newlib*) - run_step skip_on_fail -6 build_abe newlib + gnu|gnu_eabi) + rr[breakup_changed_components]="breakup_changed_components gcc" ;; + *) assert false ;; esac -patch_branch="" -if [ x"${rr[metric]}" = x"vect" ]; then - patch_branch="--patch linaro-local/vect-metric-branch" -fi - -run_step skip_on_fail -5 build_abe stage2 -- $patch_branch "${gcc_override_configure[@]}" - +run_step stop_on_fail -10 reset_artifacts case "${rr[toolchain]}" in - llvm) run_step skip_on_fail -3 build_llvm true ;; -esac -case "${#cflags[@]}" in - 2) - # Don't bisect benchmark build/run failures in *-vs-* configurations. - # Bisections happen only for regressions with build scores >=0, - # which will happen if benchmark "${cflags[1]}" succeeds. - run_step skip_on_fail -1 benchmark -- "${cflags[0]}" ${rr[top_artifacts]}/results_id-1 - run_step skip_on_fail 0 benchmark -- "${cflags[1]}" ${rr[top_artifacts]}/results_id-2 - # Set final "build" score to "1" for compatibility with older results - run_step skip_on_fail 1 true - rr[no_regression_p]=no_regression_vs_p - run_step reset_on_fail x check_regression + gnu*) + run_step stop_on_fail x prepare_abe + run_step skip_on_fail -9 build_abe binutils + run_step skip_on_fail -8 build_abe stage1 -- \ + "${gcc_override_configure[@]}" + run_step skip_on_fail x clean_sysroot + case "${rr[components]}" in + *glibc*) + run_step skip_on_fail -7 build_abe linux + run_step skip_on_fail -6 build_abe glibc + ;; + *newlib*) + run_step skip_on_fail -6 build_abe newlib + ;; + esac + run_step skip_on_fail -5 build_abe stage2 -- \ + "${gcc_override_configure[@]}" ;; - 1) - # Bisect benchmark build/run failures in non-vs configurations. - # Set score to "0" with "true". - run_step skip_on_fail 0 true - run_step skip_on_fail 1 benchmark -- "${cflags[0]}" ${rr[top_artifacts]}/results_id - rr[no_regression_p]=no_regression_to_base_p - run_step reset_on_fail x check_regression + llvm) + + run_step skip_on_fail -3 build_bmk_llvm ;; esac -run_step stop_on_fail x update_baseline -run_step stop_on_fail x push_baseline +run_step skip_on_fail 1 benchmark -- "$cflags" +run_step reset_on_fail x check_regression trap "" EXIT |