diff options
Diffstat (limited to 'tcwg_bmk-build.sh')
-rwxr-xr-x | tcwg_bmk-build.sh | 813 |
1 files changed, 317 insertions, 496 deletions
diff --git a/tcwg_bmk-build.sh b/tcwg_bmk-build.sh index 87263a59..f1cf6c64 100755 --- a/tcwg_bmk-build.sh +++ b/tcwg_bmk-build.sh @@ -10,86 +10,66 @@ scripts=$(dirname $0) convert_args_to_variables "$@" -obligatory_variables rr[ci_project] rr[ci_config] ssh_host ssh_port +obligatory_variables rr[ci_project] rr[ci_config] +declare -A rr -# Execution mode: baseline, bisect, jenkins-full -# shellcheck disable=SC2154 -rr[mode]="${rr[mode]-baseline}" +# All bmk config about hw and benchs is implemented in this file +# shellcheck source=tcwg_bmk-config.sh +. $scripts/tcwg_bmk-config.sh + +# Execution mode: build or bisect +rr[mode]="${rr[mode]-build}" # Set custom revision for one of the projects, and use baseline revisions # for all other projects. -# shellcheck disable=SC2154 rr[baseline_branch]="${rr[baseline_branch]-linaro-local/ci/${rr[ci_project]}/${rr[ci_config]}}" -# shellcheck disable=SC2154 -rr[update_baseline]="${rr[update_baseline]-update}" -# shellcheck disable=SC2154 +rr[update_baseline]="${rr[update_baseline]-ignore}" rr[top_artifacts]="${rr[top_artifacts]-$(pwd)/artifacts}" -# Set metric to perf by default. -# shellcheck disable=SC2154 -rr[metric]="${rr[metric]-perf}" - -# {toolchain_name}-{toolchain_ver}-{target}-{bmk}-{cflags} -IFS=- read -a ci_config <<EOF -${rr[ci_config]} +# ${ci_project}--${ci_config} format is : +# 'tcwg_bmk-#{PROFILE_NAME}-#{BMK}--#{TOOLCHAIN}-#{TARGET}-{toolchain_ver}-{cflags}' +IFS=- read -a ci_pjt_cfg <<EOF +${rr[ci_project]}--${rr[ci_config]} EOF -# shellcheck disable=SC2154 -rr[toolchain]=${rr[toolchain]-${ci_config[0]}} -# shellcheck disable=SC2154 -rr[target]=${rr[target]-${ci_config[2]}} -benchmarks=("${benchmarks[@]-${ci_config[3]}}") -if [ x"${benchmarks[*]}" = x"default" ]; then - benchmarks=("${ci_config[3]}") -fi -if ! test_array cflags; then - ci_config=("${ci_config[@]:4}") - # In ${ci_config[@]} we now have "-"-separated entries (due to IFS=- above). - # We restore "-" in compiler flags when doing flags="$flags-$flag" below. - # We use "_" to separate compiler options, and it is translated to " -" - # in benchmark(). - cflags=() - while [ ${#ci_config[@]} -ge 1 ]; do - flags="" - while [ ${#ci_config[@]} -ge 1 ]; do - flag="${ci_config[0]}" - ci_config=("${ci_config[@]:1}") - if [ x"$flag" = x"vs" ]; then - break - fi - flags="$flags-$flag" - done - cflags+=("$flags") - done -fi + +rr[toolchain]=${rr[toolchain]-${ci_pjt_cfg[4]}} +rr[target]=${rr[target]-${ci_pjt_cfg[5]}} + +cflags="${cflags--${ci_pjt_cfg[7]}}" gcc_mode="" -for i in $(seq 0 $(("${#cflags[@]}" - 1))); do - cflags_mode="" - if [[ x"${cflags[$i]}" == x*"VECT"* ]]; then - rr[metric]="vect" - fi +case "${rr[target]}:$cflags" in + "arm:"*"mthumb"*) gcc_mode=thumb ;; + "arm:"*"marm"*) gcc_mode=arm ;; + "arm:-Os"*|"arm:-Oz"*) + gcc_mode=thumb + cflags="${cflags}_mthumb" + ;; + "arm:"*) + gcc_mode=arm + cflags="${cflags}_marm" + ;; + "arm_eabi:"*) + cflags="${cflags}_mthumb" + ;; +esac - case "${rr[target]}:${cflags[$i]}" in - "arm:"*"mthumb"*) cflags_mode=thumb ;; - "arm:"*"marm"*) cflags_mode=arm ;; - "arm:-Os"*|"arm:-Oz"*) - cflags_mode=thumb - cflags[$i]="${cflags[$i]}_mthumb" - ;; - "arm:"*) - cflags_mode=arm - cflags[$i]="${cflags[$i]}_marm" - ;; - "arm_eabi:"*) - cflags[$i]="${cflags[$i]}_mthumb" - ;; - esac - if [ x"$gcc_mode" = x"" ]; then - gcc_mode="$cflags_mode" - elif [ x"$gcc_mode" != x"$cflags_mode" ]; then - assert_with_msg "Unsupported arm/thumb configuration ${cflags[$(($i - 1))]} and ${cflags[$i]}" false - fi -done +cflags="$(echo $cflags | sed -e "s/_/ -/g" -e "s/LTO/flto/g")" + +case "${rr[ci_project]}" in + *-*_size-*) rr[metric_id]="size" ;; + *-*_speed-*) rr[metric_id]="sample" ;; + *-*_vect-*) rr[metric_id]="num_vect_loops" ;; + *-*_sve-*) rr[metric_id]="num_sve_loops" ;; + *) assert_with_msg "Cannot determine metric from ${rr[ci_project]}" false ;; +esac + +called_from_notify=${called_from_notify-false} + +hw=$(tcwg_bmk_hw) +hw=${hw%_32} ; hw=${hw%_64} + +# ----------------------------------------------------------------------- gcc_override_configure=() # Set default ARM/Thumb mode for AArch32 compiler. This ensures that libraries @@ -109,18 +89,23 @@ gcc_override_configure+=("--set" "gcc_override_configure=--disable-libsanitizer" # board type. case "${rr[target]}" in "arm_eabi") gcc_override_configure+=("--set" "gcc_override_configure=--disable-multilib" - "--set" "gcc_override_configure=--with-cpu=cortex-m4" "--set" "gcc_override_configure=--with-mode=thumb" "--set" "gcc_override_configure=--with-float=hard" ) ;; esac + +rr[cpu]=$(tcwg_bmk_cpu) +if [ "${rr[cpu]}" != "" ]; then + gcc_override_configure+=("--set" "gcc_override_configure=--with-cpu=${rr[cpu]}") + cflags="$cflags -mcpu=${rr[cpu]}" +fi + case "${rr[toolchain]}" in llvm) - # shellcheck disable=SC2154 - rr[components]="binutils gcc glibc llvm" ;; + rr[components]="llvm" ;; gnu) - rr[components]="binutils gcc glibc" ;; + rr[components]="binutils gcc linux glibc" ;; gnu_eabi) rr[components]="binutils gcc newlib" ;; *) assert_with_msg "Unknown toolchain \"${rr[toolchain]}\"" false ;; @@ -144,24 +129,19 @@ trap print_traceback EXIT default_start_at="" default_finish_at="" case "${rr[mode]}" in - "baseline") - default_finish_at="update_baseline" - ;; "bisect") single_updated_component="$(print_single_updated_component)" case $single_updated_component in binutils) default_start_at="build_abe-binutils" ;; gcc) default_start_at="build_abe-stage1" ;; - glibc) default_start_at="clean_sysroot" ;; - llvm) default_start_at="build_llvm-true" ;; + linux|glibc) default_start_at="clean_sysroot" ;; + llvm) default_start_at="build_bmk_llvm" ;; newlib) default_start_at="build_abe-newlib" ;; *) assert_with_msg \ - "Invalid single updated component \"$single_updated_component\"" false - ;; + "Invalid single updated component \"$single_updated_component\"" false + ;; esac - default_finish_at="check_regression" ;; - "jenkins-full") ;; esac if [ x"$start_at" = x"default" ]; then start_at="$default_start_at" @@ -170,33 +150,52 @@ if [ x"$finish_at" = x"default" ]; then finish_at="$default_finish_at" fi +case "${rr[ci_project]}/${rr[ci_config]}" in + tcwg_bmk-code_speed-cpu2017rate/gnu-aarch64-master-O2|\ + tcwg_bmk-code_speed-cpu2017rate/gnu-aarch64-master-O3|\ + tcwg_bmk-code_speed-cpu2017rate/llvm-aarch64-master-O2|\ + tcwg_bmk-code_speed-cpu2017rate/llvm-aarch64-master-O3) + rr[major]=3 + rr[minor]=0 + ;; + *) + rr[major]=2 + rr[minor]=3 + ;; +esac + run_step_init "$start_at" "$finish_at" "${rr[top_artifacts]}" "$verbose" -# If we bisect a regression between different major versions of Glibc, -# then we might get a mixed sysroot with several versions of ld-M.N.so and -# other binaries installed side-by-side. Such a sysroot will break -# benchmarking, which requires a single ld-*.so binary to be present. -# Forcefully delete sysroot before building C library. -clean_sysroot () +build_bmk_llvm () { ( set -euf -o pipefail - local gnu_target sysroot - gnu_target=$(print_gnu_target ${rr[target]}) - sysroot="$(pwd)/abe/builds/destdir/x86_64-pc-linux-gnu/$gnu_target/libc" + local projects="clang;lld;openmp" + case "${rr[target]}" in + aarch64) + # Flang is not supported for AArch32 + projects="$projects;flang" + ;; + esac + + build_llvm "$projects" "" "${rr[metric_id]}" - rm -rf "$sysroot" + # Copy shared libraries to runtime sysroot dir + mkdir -p llvm-install/libc + rsync -a --del --include "*/" --include "*.so*" --exclude "*" \ + --delete-excluded llvm-install/lib/ llvm-install/libc/lib/ ) } benchmark () { + obligatory_variables ssh_host ssh_port + ( set -euf -o pipefail - local bmk_cflags="$1" - local results_id_file="$3" + local bmk_flags="$2" sanity_check_pwd @@ -204,511 +203,333 @@ benchmark () rm -rf "$(pwd)"/bin mkdir "$(pwd)"/bin - local bmk_flags bmk_ldflags reboot run_profile - bmk_flags="$(echo $bmk_cflags | sed -e "s/_/ -/g" -e "s/LTO/flto/g")" - bmk_flags="$(echo $bmk_cflags | sed -e "s/_/ -/g" -e "s/VECT/fmetric-vect/g")" - case "$bmk_cflags" in - "-Os"*|"-Oz"*) + local reboot run_profile + + local hw_tag + hw_tag=$(tcwg_bmk_hw) + + case "${rr[ci_project]}" in + *_size*|*_vect*|*_sve*) reboot=false run_profile="parallel" + testmode="verify" ;; *) reboot=true run_profile="serial" + testmode="benchmark" ;; esac - local bench_list bin cc gnu_target sysroot toolchain - gnu_target=$(print_gnu_target ${rr[target]}) - sysroot="$(pwd)/abe/builds/destdir/x86_64-pc-linux-gnu/$gnu_target/libc" + local bench_list bin cc sysroot toolchain case "${rr[toolchain]}" in llvm) - local llvm_target - llvm_target=$(echo "$gnu_target" | sed -e "s/^arm-/armv7a-/") - bmk_flags="$bmk_flags --target=$llvm_target --sysroot=$sysroot" - bmk_ldflags="$bmk_flags" - # Use LLD for LLVM configurations. - # Also, BFD linker crashes for AArch32 LTO builds, - # see https://projects.linaro.org/browse/LLVM-562 . - case "$bmk_ldflags" in - *"-fuse-ld="*) ;; - *) bmk_ldflags="$bmk_ldflags -fuse-ld=lld" ;; - esac + sysroot="$(pwd)/llvm-install/libc" bin="$(pwd)/llvm-install/bin" cc="$bin/" toolchain="llvm" ;; gnu|gnu_eabi) - bmk_ldflags="$bmk_flags" - bin="$(pwd)/abe/builds/destdir/x86_64-pc-linux-gnu/bin" + local gnu_host gnu_target + gnu_host=$(print_gnu_target native) + gnu_target=$(print_gnu_target ${rr[target]}) + sysroot="$(pwd)/abe/builds/destdir/$gnu_host/$gnu_target/libc" + bin="$(pwd)/abe/builds/destdir/$gnu_host/bin" cc="$bin/$gnu_target-" toolchain="gnu" + # Append -fdump-statistics-asmname to obtain compile time metrics. + bmk_flags="$bmk_flags -fdump-statistics-asmname -fdump-tree-vect-details" ;; esac - case "${rr[toolchain]}:${benchmarks[@]}" in - llvm:spec2k6) bench_list="c_and_cxx" ;; - gnu:spec2k6) bench_list="all" ;; - llvm:spec2017) bench_list="spec2017_speed_nofortran" ;; - gnu:spec2017) bench_list="spec2017_speed" ;; - *) bench_list="${benchmarks[*]}" ;; - esac + + bench_list="$(tcwg_bmk_benchs)" + # shellcheck disable=SC2154 sysroot="ssh://$ssh_host:$ssh_port:$sysroot" - local hw_tag - case "${rr[ci_project]}:${rr[target]}" in - *_sq_32*:*) hw_tag=sq_32 ;; - *_sq_64*:*) hw_tag=sq_64 ;; - *_sq*:arm*) hw_tag=sq_32 ;; - *_sq*:aarch64) hw_tag=sq_64 ;; - *_tk1_32*:*) hw_tag=tk1_32 ;; - *_tk1*:arm*) hw_tag=tk1_32 ;; - *_tx1_32*:*) hw_tag=tx1_32 ;; - *_tx1_64*:*) hw_tag=tx1_64 ;; - *_tx1*:arm*) hw_tag=tx1_32 ;; - *_tx1*:aarch64) hw_tag=tx1_64 ;; - *_stm32*:arm*) hw_tag=stm32_STM32L476RGTx ;; - *) echo "ERROR: Unknown hw_tag for ${rr[ci_project]}:${rr[target]}"; exit 1 ;; - esac + local hw image_arch toolchain_proto - local hw image_arch + toolchain_proto=ssh case "$hw_tag" in - sq_32) hw=sq; image_arch=armhf ;; - sq_64) hw=sq; image_arch=arm64 ;; - tk1_32) hw=tk1; image_arch=armhf ;; - tx1_32) hw=tx1; image_arch=armhf ;; - tx1_64) hw=tx1; image_arch=arm64 ;; - stm32_STM32L476RGTx) hw=stm32; image_arch=armhf ;; + stm32) + hw=stm32; image_arch=amd64 + # When running benchmarks on stm32, we prefer to rsync the + # toolchain to the board's host machine -- dev-02.tcwglab. + toolchain_proto=rsync + ;; + *_32) hw=${hw_tag/_32}; image_arch=armhf ;; + *_64) hw=${hw_tag/_64}; image_arch=arm64 ;; *) echo "ERROR: Unknown hw_tag $hw_tag"; exit 1 ;; esac - local results_id="$hw_tag/${rr[ci_project]}/${rr[mode]}-${rr[ci_config]}/@build_num@" - - # When running benchmarks on stm32, we prefer to rsync the - # toolchain towards tcwg-bmk-stm32-01. - case "$hw" in - stm32) toolchain_proto=rsync ;; - *) toolchain_proto=ssh ;; - esac + # Create directory for tcwg-benchmark to upload results to. + # Note that files inside $results_dir will be owned by tcwg-benchmark. + local results_dir + results_dir="$(mktemp -d)" + chmod 0777 "$results_dir" + # Trigger benchmarking job and capture its console output. + # Ignore exit code of the trigger command to detect various failure + # conditions from examining the console log. # shellcheck disable=SC2154 remote_exec "ci.linaro.org:2222::-l $USER@linaro.org" \ - build tcwg-benchmark -w \ + build tcwg-benchmark -f -v \ -p bmk_hw=$hw \ -p bench_list="$bench_list" \ -p cflags="$bmk_flags" \ - -p ldflags="$bmk_ldflags" \ - -p testmode=benchmark \ - -p displaytag="${rr[ci_project]}/${rr[mode]}-${rr[ci_config]}" \ + -p ldflags="$bmk_flags" \ + -p testmode="$testmode" \ + -p displaytag="${rr[ci_project]}/${rr[ci_config]}-${rr[mode]}" \ -p ignore_errors=true \ -p toolchain_url=$toolchain_proto://$ssh_host:$ssh_port:$cc \ -p toolchain_type=$toolchain \ -p sysroot="$sysroot" \ - -p results_id="$results_id" \ + -p results_dest="$ssh_host:$ssh_port:$results_dir" \ -p reboot="$reboot" \ -p run_profile="$run_profile" \ -p image_arch="$image_arch" \ ${scripts_branch+-p scripts_branch="$scripts_branch"} \ ${bmk_branch+-p bmk_branch="$bmk_branch"} \ - | tee $run_step_artifacts/benchmark-start.log + | tee $run_step_artifacts/benchmark-build.log || true local build_num - build_num=$(cat $run_step_artifacts/benchmark-start.log \ - | sed -e "s/.*#\([0-9]\+\).*/\1/") + build_num=$(head -n1 $run_step_artifacts/benchmark-build.log \ + | sed -e "s/Started.*#\([0-9]\+\).*/\1/") assert_with_msg "Benchmark build number should not be 0!" \ - [ "$build_num" -gt "0" ] + [ "$build_num" -gt "0" ] + + cat > $run_step_artifacts/benchmark_job.txt << EOF +Url: https://ci.linaro.org/job/tcwg-benchmark/$build_num +Name: $(curl -s "https://ci.linaro.org/job/tcwg-benchmark/$build_num/api/json?tree=displayName" \ + | jq -r ".displayName") +EOF local build_status local build_ret while true; do - (remote_exec "ci.linaro.org:2222::-l $USER@linaro.org" \ - console tcwg-benchmark -n 1 -f $build_num || true) \ - | tee -a $run_step_artifacts/benchmark.log - # Ssh connection to ci.linaro.org occasionally drops. We need # to check whether benchmarking has finished, and, if not, continue - # to watch its output. - build_status=$(tail -n 1 $run_step_artifacts/benchmark.log) + # waiting. + build_status=$(curl -s \ + "https://ci.linaro.org/job/tcwg-benchmark/$build_num/api/json?tree=result" \ + | jq -r ".result") case "$build_status" in - "Finished: SUCCESS") + "null") + # Continue waiting + true + ;; + "SUCCESS") build_ret=0 break ;; - "Finished: "*) - echo "# Benchmarking infra is offline:" >> ${rr[top_artifacts]}/results + *) + echo "# Benchmarking infra is offline:" \ + >> ${rr[top_artifacts]}/results echo "-$EXTERNAL_FAIL" >> ${rr[top_artifacts]}/results build_ret=1 break ;; esac - # Sleep a little to avoid flooding ci.linaro.org on transient ssh - # failures. - sleep 5 + # Wait by following console output + (ssh -p2222 -l $USER@linaro.org ci.linaro.org \ + console tcwg-benchmark $build_num -f || true) \ + | tee $run_step_artifacts/benchmark-wait.log done - echo "$results_id" | sed -e "s/@build_num@/$build_num/g" \ - > "$results_id_file" - return $build_ret - ) -} + rm -rf "${rr[top_artifacts]}/annex" + mkdir "${rr[top_artifacts]}/annex" + ln -s "$results_dir" "${rr[top_artifacts]}/annex/bmk-data" -# Compare results obtained from perf data between $1 and $2 -# and generate results-compare.csv -compare_results_perf () -{ - ( - set -euf -o pipefail - - case "${cflags[0]}" in - "-Os"*|"-Oz"*) - # We use 1% tolerance for binary size - # and 10% tolerance for symbol size. - exe_threshold=101 - symbol_threshold=110 - ;; - *) - # We use 3% tolerance for binary speed - # and 15% tolerance for symbol speed. - exe_threshold=103 - symbol_threshold=115 - # Reduce thresholds when bisecting to avoid considering borderline - # regressions as spurious. This should break cycles of build and - # bisect jobs triggering each other on borderline regressions. - if [ x"${rr[mode]}" = x"bisect" ]; then - exe_threshold=102 - symbol_threshold=110 - fi - ;; - esac - - local -a arr - local bmk symbol time size result prev_bmk - echo "bmk,symbol,result" > $run_step_artifacts/results-compare.csv - printf "extra_build_params=" > $run_step_artifacts/extra-bisect-params - - # Read result lines from <(tail -n +2 ...) below. - # "-n +2" is to skip the header line. - prev_bmk="" - while IFS=, read -a arr; do - bmk=${arr[0]} - symbol=${arr[1]} - time=${arr[2]} - size=${arr[3]} - case "${cflags[0]}" in - "-Os"*|"-Oz"*) metric="$size" ;; - *) metric="$time" ;; - esac - - # Skip case where we have no info ("n/a") - if [ "$metric" != "n/a" ]; then - # Remove padding from the tail of $symbol (padding is added by - # csvs2table.py for better formatting). - local short_symbol="${symbol%%[ ]*}" - case "$short_symbol" in - "["*) threshold=$symbol_threshold ;; - *"_base.default") threshold=$exe_threshold ;; - *) threshold=$symbol_threshold ;; - esac - if ! [ "$metric" -le "$threshold" ]; then - result=100 - echo "# $bmk,$symbol regressed by $metric" >> $run_step_artifacts/results.regressions - if [ x"$bmk" != x"$prev_bmk" ]; then - printf "++benchmarks %s " $bmk >> $run_step_artifacts/extra-bisect-params - prev_bmk="$bmk" - fi - else - result=1 - fi - echo "$bmk,$symbol,$result" >> $run_step_artifacts/results-compare.csv - fi - done < <(tail -n +2 $run_step_artifacts/results.csv) - printf "\n" >> $run_step_artifacts/extra-bisect-params + return $build_ret ) } -compare_results_vect () +# Exit with code 0 if no regression compared to base-artifacts/. +no_regression_p () { ( set -euf -o pipefail - echo "bmk,symbol,result" > $run_step_artifacts/results-compare.csv - - while IFS=, read -a arr; do - bmk=${arr[0]} - # hack to trim padding - symbol=$(echo ${arr[1]} | xargs) - base_num_vect_loops=${arr[3]} - target_num_vect_loops=${arr[4]} - if (( base_num_vect_loops > target_num_vect_loops )); then - echo "$bmk, $symbol, $base_num_vect_loops, $target_num_vect_loops" \ - >> $run_step_artifacts/results-compare.csv - fi - done < <(tail -n +2 $run_step_artifacts/results.csv) - ) -} -compare_results () -{ - ( - set -euf -o pipefail + # check score-based regression + no_build_regression_p "$@" - local metric=$1 - local ref_results_id="$2" - local new_results_id="$3" - local cmp_options="$4" + # At this stage, there's no score-based regression. + # We are now checking metric-based regression. - local results_ref results_new - results_ref=$(cat $ref_results_id) - results_new=$(cat $new_results_id) + assert_with_msg "Benchmarking succeeded, but bmk-data is missing" \ + [ -e $run_step_top_artifacts/annex/bmk-data ] - case "${rr[target]}" in - "arm_eabi") - cmp_options="$cmp_options --has_perf_logs no" - ;; - esac + # Make sure there is no stray results.regression file, which we use + # as failure marker. + assert ! [ -f $run_step_artifacts/results.regressions ] - $scripts/tcwg-benchmark-results.sh \ - --results_ref $results_ref ++results $results_new \ - --top_artifacts "$run_step_artifacts" --verbose $verbose \ - --metric "$metric" $cmp_options \ - > $run_step_artifacts/results.log 2>&1 - - case $metric in - "perf") - compare_results_perf - ;; - "vect") - compare_results_vect - ;; - *) - echo "Invalid metric: $metric"; - exit 1 - ;; + local compare_opts="" + case "${rr[target]}:$cflags" in + "arm_eabi":*) compare_opts="--has_perf_logs no" ;; + *) compare_opts="" ;; esac - ) -} -# Exit with code 0 if no new regressions between results_id-1 and -2 compared to -# regression between results_id-1 and -2 in base-artifacts/. -no_regression_vs_p () -{ - ( - set -euf -o pipefail - - local ref_artifacts=$1 - local new_artifacts=$2 - - # Check for build and correctness regressions. - no_build_regression_p "$@" - - # Generate ref-results-compare.csv. The value of "1" means that the result - # in the 2nd run is no worse than the result in the 1st run (as expected). - # The value of "100" means that the result in the 2nd run is worse than - # the result in the 1st run (unexpected). - # Note that we can grab previously-generated ref-results-compares.csv from - # base-artifacts/, but it could have been generated with an older version - # of scripts, so it's safer and more resilient to re-generate it from original - # perf data. - if [ ! -f "$ref_artifacts/results_id-1" ] || [ ! -f "$ref_artifacts/results_id-2" ]; then - return 0 + if [ -f /usr/lib/linux-tools/install-armhf-perf-workaround.sh ]; then + # FIXME: + # In some cases perf report crashes when run from armhf container on + # ARMv8 machine. + # Install a workaround while we are investigating the cause. + sudo /usr/lib/linux-tools/install-armhf-perf-workaround.sh fi - # <Workaround> missing reference results, which we have listed in - # tcwg-benchmark-results.broken-list. Once all entries referencing missing - # results are discarded, we'll remove this workaround. - # Otherwise compare_results will fail while fetching baseline results, - # and we'll consider this failure as a regression. - if cat "$scripts/tcwg-benchmark-results.broken-list" \ - | grep -q "^$(cat $ref_artifacts/results_id-1)\$\|^$(cat $ref_artifacts/results_id-2)\$"; then - return 0 - fi - # </Workaround> - compare_results "${rr[metric]}" "$ref_artifacts/results_id-1" "$ref_artifacts/results_id-2" \ - "--num_dsos 1 --num_symbols 0" - while IFS= read -r -d '' i - do - mv $i "$(dirname $i)"/ref-"$(basename $i)" - done < <(find $run_step_artifacts/ -type f -name "results*" -print0) + local new_results="${rr[top_artifacts]}/annex/bmk-data" + local ref_results="base-artifacts/annex/bmk-data" + + assert_with_msg "Benchmarking succeeded, but no annex/bmk-data results" \ + [ -d "$new_results" ] + + # Get the baseline results if necessary : + # Already done by reset-artifacts, but this might have been affected if a + # rewrite happened between reset-artifacts and the rest. Which is the case + # when running from a Jenkins project. + git_annex_download base-artifacts annex + + if ! [ -d "$ref_results" ]; then + # base-artifacts has no reference results. + # This can happen on init build (update_baseline=init). + # In such cases we compare results to themselves just as an exercise. + ref_results="$new_results" + assert_with_msg "No reference results" \ + [ "${rr[update_baseline]}" = "init" ] + fi - # Similarly, generate new-results-compare.csv. - if [ ! -f "$new_artifacts/results_id-1" ] || [ ! -f "$new_artifacts/results_id-2" ]; then - return 1 + # Compare vs previous run + mkdir -p ${rr[top_artifacts]}/results-vs-prev + ln -s ../results-vs-prev $run_step_artifacts/results-vs-prev + $scripts/tcwg-benchmark-results.sh \ + --results_ref "$ref_results" ++results "$new_results" \ + --top_artifacts "${rr[top_artifacts]}/results-vs-prev" \ + --verbose $verbose --hw_tag "$(tcwg_bmk_hw)" \ + $compare_opts \ + > ${rr[top_artifacts]}/results-vs-prev/tcwg-benchmark-results.log 2>&1 & + + local res + res=0 && wait $! || res=$? + if [ $res != 0 ]; then + return $EXTERNAL_FAIL fi - compare_results "${rr[metric]}" "$new_artifacts/results_id-1" "$new_artifacts/results_id-2" \ - "--num_dsos 1 --num_symbols 0" - while IFS= read -r -d '' i - do - mv $i "$(dirname $i)"/new-"$(basename $i)" - done < <(find $run_step_artifacts/ -type f -name "results*" -print0) - - # Now compare the two reports. - # If "ref" has value of "100" (bad state), and "new" has value of "100" - # (also bad state), then we get no change, no regression, and final value - # of 100% * 100/100 == 100. - # - # If "ref" has value of "1" (good state), and "new" has value of "1" - # (also good state), then we get no change, no regression, and final value - # of 100% * 1/1 == 100. - # - # If "ref" has value of "100" (bad state), and "new" has value of "1" - # (good state), then we get a progression, and final value - # of 100% * 1/100 == 1. - # - # If "ref" has value of "1" (good state), and "new" has value of "100" - # (bad state), then we get a regression, and final value - # of 100% * 100/1 == 10000. We detect this below by comparing vs "5000". - $scripts/../bmk-scripts/csvs2table.py -p 0 --relative $run_step_artifacts/ref-results-compare.csv $run_step_artifacts/new-results-compare.csv > $run_step_artifacts/results-compare.csv - - local -a arr - local bmk symbol result status prev_bmk - local -a bisect_bmks - - # Read result lines from <(tail -n +2 ...) below. - # "-n +2" is to skip the header line. Set $status to "1" if there is - # a regression. - status=0 - prev_bmk="" - # Delete results.regressions generated by compare_results() calls above. - rm -f $run_step_artifacts/results.regressions - while IFS=, read -a arr; do - bmk=${arr[0]} - symbol=${arr[1]} - result=${arr[2]} - if ! [ "$result" -le "5000" ]; then - echo "# $bmk,$symbol regressed" >> $run_step_artifacts/results.regressions - status=1 - if [ x"$bmk" != x"$prev_bmk" ]; then - bisect_bmks+=("++benchmarks" "$bmk") - prev_bmk="$bmk" - fi + + # Below call to output-bmk-results.py creates *.regression files. + assert_with_msg "Found stale regression files" \ + [ x"$(find $run_step_artifacts/ -name "*.regression" | wc -l)" = x"0" ] + + # Extract 5 most recent compare-results-vs-prev-internal.csv files from + # base-artifacts and compute std deviation out of them + local -a csvs_paths + csvs_paths=("results-vs-prev/compare-results-internal.csv" + "$(basename $run_step_artifacts)/compare-results-vs-prev-internal.csv") + + local -a history_csvs + local csv history_root="" + while read csv; do + if [ "$history_root" = "" ]; then + history_root="$csv" + continue + fi + + history_csvs+=("$csv") + done < <(get_git_history -0 base-artifacts "${csvs_paths[@]}") + + local csv tmpf + local -a compare_results_list=() + tmpf=$(mktemp) + + # FIXME: + # To deal with some differences along base-artifacts recent history + # - remove 'Failed for column' message from csv file + # - skip emtpy csv files. + for csv in "${history_csvs[@]}"; do + grep -v 'Failed for column' "$csv" > "$tmpf" || true + cp "$tmpf" "$csv" + if [ -s "$csv" ]; then + compare_results_list+=("$csv") fi - done < <(tail -n +2 $run_step_artifacts/results-compare.csv) - echo "extra_build_params=${bisect_bmks[*]}" > $run_step_artifacts/extra-bisect-params - return $status - ) -} + done -# Exit with code 0 if no regression compared to base-artifacts/. -# Inspect build results ./results and performance results in ./results_id. -no_regression_to_base_p () -{ - ( - set -euf -o pipefail + if [ ${#compare_results_list[@]} != 0 ]; then + $scripts/../bmk-scripts/compute-variability.py \ + --inputs "${compare_results_list[@]}" ${rr[top_artifacts]}/results-vs-prev/compare-results-internal.csv \ + --weights linear --method avg \ + --output ${rr[top_artifacts]}/results-vs-prev/bmk-specific-variability-avg.csv || true - no_build_regression_p "$@" + $scripts/../bmk-scripts/compute-variability.py \ + --inputs "${compare_results_list[@]}" ${rr[top_artifacts]}/results-vs-prev/compare-results-internal.csv \ + --weights 2-peaks-linear --method max \ + --output ${rr[top_artifacts]}/results-vs-prev/bmk-specific-variability-max.csv || true + fi - local ref_artifacts=$1 - local new_artifacts=$2 + rm -rf "$history_root" "$tmpf" - if ! [ -f "$ref_artifacts/results_id" ]; then - return 0 - fi - # <Workaround> missing reference results, which we have listed in - # tcwg-benchmark-results.broken-list. Once all entries referencing missing - # results are discarded, we'll remove this workaround. - # Otherwise compare_results will fail while fetching baseline results, - # and we'll consider this failure as a regression. - if cat "$scripts/tcwg-benchmark-results.broken-list" \ - | grep -q "^$(cat $ref_artifacts/results_id)\$"; then - return 0 - fi - # </Workaround> - if ! [ -f "$new_artifacts/results_id" ]; then - return 1 - fi + $scripts/../bmk-scripts/output-bmk-results.py \ + --compare_results ${rr[top_artifacts]}/results-vs-prev/compare-results-internal.csv \ + --variability_file ${rr[top_artifacts]}/results-vs-prev/bmk-specific-variability-avg.csv \ + --variability_file_data "avg" \ + --run_step_dir "$run_step_artifacts"/ \ + --metric "${rr[metric_id]}" --mode "${rr[mode]}" \ + --details quiet > $run_step_artifacts/output-bmk-results.log - # Make sure there is no stray results.regression file, which we use - # as failure marker. - assert ! [ -f $run_step_artifacts/results.regressions ] + # copy inputs useful to build the mail / jira / .. to mail dir + for resfile in $run_step_artifacts/{exe,symbol}.{regression,improvement}; do + if [ -f $resfile ]; then + cp $resfile ${rr[top_artifacts]}/notify/ + fi + done - local compare_opts="" - case "${cflags[0]}" in - *"_LTO"*) compare_opts="--num_symbols 0 --entry_threshold 10" ;; - esac - compare_results "${rr[metric]}" "$ref_artifacts/results_id" "$new_artifacts/results_id" "$compare_opts" + # return status rely on the presence of the results.regressions file if [ -f $run_step_artifacts/results.regressions ]; then + assert_with_msg "Found a regression while comparing the build against itself" \ + [ "$ref_results" != "$new_results" ] return 1 fi return 0 ) } -# Implement rr[breakup_updated_components] hook. -tcwg_bmk_breakup_updated_components () -{ - ( - set -euf -o pipefail - - # Compiler changes tend to cause the most regressions. - # Breakup updated components into compiler and the rest of components - # to reduce the number of builds. - local cc - case "${rr[toolchain]}" in - llvm) cc="llvm" ;; - gnu|gnu_eabi) cc="gcc" ;; - *) assert false ;; - esac - - if print_updated_components "\n" | grep -q "^$cc\$"; then - echo "$cc" - print_updated_components "\n" | grep -v "^$cc\$" | tr '\n' ' ' | sed -e "s/ \$//g" - echo - else - print_updated_components "\n" - fi - ) -} -# shellcheck disable=SC2154 -rr[breakup_updated_components]=tcwg_bmk_breakup_updated_components - -run_step stop_on_fail -10 reset_artifacts -run_step stop_on_fail x prepare_abe -run_step skip_on_fail -9 build_abe binutils -run_step skip_on_fail -8 build_abe stage1 -- "${gcc_override_configure[@]}" -run_step skip_on_fail x clean_sysroot -case "${rr[components]}" in - *glibc*) - run_step skip_on_fail -7 build_abe linux - run_step skip_on_fail -6 build_abe glibc +# Compiler changes tend to cause the most regressions. +# Breakup updated components into compiler and the rest of components +# to reduce the number of builds. +case "${rr[toolchain]}" in + llvm) + rr[breakup_changed_components]="breakup_changed_components llvm" ;; - *newlib*) - run_step skip_on_fail -6 build_abe newlib + gnu|gnu_eabi) + rr[breakup_changed_components]="breakup_changed_components gcc" ;; + *) assert false ;; esac -patch_branch="" -if [ x"${rr[metric]}" = x"vect" ]; then - patch_branch="--patch linaro-local/vect-metric-branch" -fi - -run_step skip_on_fail -5 build_abe stage2 -- $patch_branch "${gcc_override_configure[@]}" - +run_step stop_on_fail -10 reset_artifacts case "${rr[toolchain]}" in - llvm) run_step skip_on_fail -3 build_llvm true ;; -esac -case "${#cflags[@]}" in - 2) - # Don't bisect benchmark build/run failures in *-vs-* configurations. - # Bisections happen only for regressions with build scores >=0, - # which will happen if benchmark "${cflags[1]}" succeeds. - run_step skip_on_fail -1 benchmark "${cflags[0]}" -- ${rr[top_artifacts]}/results_id-1 - run_step skip_on_fail 0 benchmark "${cflags[1]}" -- ${rr[top_artifacts]}/results_id-2 - # Set final "build" score to "1" for compatibility with older results - run_step skip_on_fail 1 true - # shellcheck disable=SC2154 - rr[no_regression_p]=no_regression_vs_p - run_step reset_on_fail x check_regression + gnu*) + run_step stop_on_fail x prepare_abe + run_step skip_on_fail -9 build_abe binutils + run_step skip_on_fail -8 build_abe stage1 -- \ + "${gcc_override_configure[@]}" + run_step skip_on_fail x clean_sysroot + case "${rr[components]}" in + *glibc*) + run_step skip_on_fail -7 build_abe linux + run_step skip_on_fail -6 build_abe glibc + ;; + *newlib*) + run_step skip_on_fail -6 build_abe newlib + ;; + esac + run_step skip_on_fail -5 build_abe stage2 -- \ + "${gcc_override_configure[@]}" ;; - 1) - # Bisect benchmark build/run failures in non-vs configurations. - # Set score to "0" with "true". - run_step skip_on_fail 0 true - run_step skip_on_fail 1 benchmark "${cflags[0]}" -- ${rr[top_artifacts]}/results_id - rr[no_regression_p]=no_regression_to_base_p - run_step reset_on_fail x check_regression + llvm) + + run_step skip_on_fail -3 build_bmk_llvm ;; esac -run_step stop_on_fail x update_baseline -run_step stop_on_fail x push_baseline +run_step skip_on_fail 1 benchmark -- "$cflags" +run_step reset_on_fail x check_regression trap "" EXIT |