diff options
author | Laurent Alfonsi <laurent.alfonsi@linaro.org> | 2023-05-30 13:54:38 +0200 |
---|---|---|
committer | Laurent Alfonsi <laurent.alfonsi@linaro.org> | 2023-06-14 10:08:52 +0000 |
commit | 4fe33ced0edd10c418f8fa42a41f1cb5f78b1d43 (patch) | |
tree | d727ba33cf0989efeb9be26473c5dcab27218a65 /tcwg_bmk-build.sh | |
parent | b9163889304480d01479518b825605da6bdea0ea (diff) |
tcwg_bmk-build.sh: Use output-bmk-results output for bmk
Details changes:
1) output-bmk-results now directly outputs in $run_step_artifacts
{exe,symbol}.{regression,improvement} and extra-bisect-params files
are all generated by output-bmk-results.py
2) $bmk.symbols-regression/improvement files doesn't exist anymore
all details are now in symbol.regression/improvement
3) $bmk.symbols-skipped doesn't exist anymore
All skipped exe/symbols are now in any.skipped.
4) results-vs-prev/compare-results.csv is not generated anymore
Seemed to be useless.
Change-Id: I4ef03ae52d97e42b14def96f1cc967d5ebf29123
Diffstat (limited to 'tcwg_bmk-build.sh')
-rwxr-xr-x | tcwg_bmk-build.sh | 250 |
1 files changed, 4 insertions, 246 deletions
diff --git a/tcwg_bmk-build.sh b/tcwg_bmk-build.sh index 295df6a7..bbe76f7c 100755 --- a/tcwg_bmk-build.sh +++ b/tcwg_bmk-build.sh @@ -426,6 +426,7 @@ compare_results () local -a compare_results_list=() tmpf=$(mktemp) + # FIXME: # To deal with some differences along base-artifacts recent history # - remove 'Failed for column' message from csv file # - skip emtpy csv files. @@ -445,263 +446,20 @@ compare_results () rm -rf "${history_csvs[0]}" "$tmpf" - # Temporarily store results of output-bmk-results.py into - # $run_step_artifacts/output-bmk-results. The intent is to monitor - # the results for few bmk runs and eventually replace comparison logic - # in this function. - local verbose_opt="" if $verbose; then verbose_opt="verbose" fi - mkdir -p "$run_step_artifacts"/output-bmk-results $scripts/../bmk-scripts/output-bmk-results.py \ ${rr[top_artifacts]}/results-vs-prev/compare-results-internal.csv \ $run_step_artifacts/bmk-specific-variability.csv \ - "$run_step_artifacts"/output-bmk-results \ - "${rr[mode]}" $verbose_opt || touch "$run_step_artifacts"/output-bmk-results/error_code-$? - - case "${rr[ci_project]}" in - *-code_vect-*) metric_id="vect" ;; - *-code_sve-*) metric_id="sve" ;; - *-code_size-*|*-fujitsu_size-*) metric_id="size" ;; - *-code_speed-*|*-fujitsu_speed-*) metric_id="time" ;; - esac - - local regressed_by improved_by changed_by exe_threshold symbol_threshold - - case $metric_id in - size) - # We use 1% tolerance for binary size - # and 10% tolerance for symbol size. - exe_threshold=1 - symbol_threshold=10 - regressed_by="grew in size by" - improved_by="reduced in size by" - ;; - time) - # We use 3% tolerance for binary speed - # and 15% tolerance for symbol speed. - exe_threshold=3 - symbol_threshold=15 - - # Reduce thresholds when bisecting to avoid considering borderline - # regressions as spurious. This should break cycles of build and - # bisect jobs triggering each other on borderline regressions. - if [ x"${rr[mode]}" = x"bisect" ]; then - exe_threshold=2 - symbol_threshold=10 - fi - - regressed_by="slowed down by" - improved_by="speeds up by" - ;; - vect|sve) - exe_threshold=0 - symbol_threshold=0 - regressed_by="reduced by" - improved_by="increased up by" - ;; - *) assert false ;; - esac - - local -a arr - local metric bmk symbol rtime rsize rvect rsve time1 time2 size1 size2 vect1 vect2 sve1 sve2 - local long_diag short_symbol short_diag - local result prev_bmk - echo "bmk,symbol,result" > ${rr[top_artifacts]}/results-vs-prev/compare-results.csv - printf "extra_build_params=" > $run_step_artifacts/extra-bisect-params - - # Read result lines from <(tail -n +2 ...) below. - # "-n +2" is to skip the header line. - prev_bmk="" - while IFS=, read -a arr; do - bmk=${arr[0]} - symbol=${arr[1]} - rtime=${arr[2]} - rsize=${arr[3]} - rvect=${arr[4]} - rsve=${arr[5]} - # $arr[6] is used to store rel_symbol_md5sum, ignore it. - time1=${arr[7]} - time2=${arr[8]} - size1=${arr[9]} - size2=${arr[10]} - vect1=${arr[11]} - vect2=${arr[12]} - sve1=${arr[13]} - sve2=${arr[14]} - md5sum1=${arr[15]} - md5sum2=${arr[16]} - - case $metric_id in - size) metric=$rsize ;; - time) metric=$rtime ;; - vect) metric=$rvect ;; - sve) metric=$rsve ;; - *) assert false ;; - esac - - # We don't want to track regressions on Mean benchmark. Mean variations are - # interesting, but we should never bisect. Don't go furhter in this loop. - if [ "$bmk" == "Mean" ]; then - continue - fi - - # Skip case where we have no info ("n/a") - if [ "$metric" != "n/a" ]; then - metric=$(($metric - 100)) - # For vect or sve metric, relative value < 100 will be a regression. - # So effectively for any metric, if $metric is positive, - # then it's a regression. - if [ x"$metric_id" = x"vect" ] || [ x"$metric_id" = x"sve" ]; then - metric=$((-$metric)) - fi - # Remove padding from the tail of $symbol (padding is added by - # csvs2table.py for better formatting). - short_symbol="$(echo "$symbol" | sed -e "s/ *\$//")" - - local bmk_exe - case "$short_symbol" in - "["*) bmk_exe=false ;; - *"_base.default") bmk_exe=true ;; - *) bmk_exe=false ;; - esac - - local threshold neg_threshold - if $bmk_exe; then - threshold=$exe_threshold - neg_threshold=$((-exe_threshold)) - else - threshold=$symbol_threshold - neg_threshold=$((-symbol_threshold)) - fi - - if [ "$metric" -gt "$threshold" ] || [ "$metric" -lt "$neg_threshold" ]; then - if [ "$metric" -gt "$threshold" ]; then - change_kind="regression" - changed_by=$regressed_by - else - change_kind="improvement" - changed_by=$improved_by - fi - - # There's a regression - result=100 - - case $metric_id in - size) - short_diag="$changed_by ${metric}%" - long_diag="$short_diag from $size1 to $size2 bytes" - ;; - time) - short_diag="$changed_by ${metric}%" - long_diag="$short_diag from $time1 to $time2 perf samples" ;; - vect) - short_diag="$changed_by ${metric}%" - long_diag="$short_diag from $vect1 to $vect2" ;; - sve) - short_diag="$changed_by ${metric}%" - long_diag="$short_diag from $sve1 to $sve2" ;; - *) assert false ;; - esac - - if $bmk_exe; then - short_diag="$bmk $short_diag" - long_diag="$bmk $long_diag" - # Detect magic sample counts that indicate failure to build - # and failure to run - case "$time1:$time2" in - 888888888:888888888|999999999:999999999) - # Should never happen as we have neither - # an improvement nor a regression. - assert false - ;; - *:999999999) - change_kind="regression" - short_diag="$bmk failed to build" - long_diag="$short_diag" - ;; - 999999999:888888888) - change_kind="improvement" - short_diag="$bmk built OK, but failed to run" - long_diag="$short_diag" - ;; - *:888888888) - change_kind="regression" - short_diag="$bmk failed to run" - long_diag="$short_diag" - ;; - 888888888:*) - change_kind="improvement" - short_diag="$bmk run OK" - long_diag="$short_diag" - ;; - 999999999:*) - change_kind="improvement" - short_diag="$bmk built and run OK" - long_diag="$short_diag" - ;; - esac - echo "$metric,$bmk,$symbol,$short_diag,$long_diag" >> $run_step_artifacts/exe.$change_kind - else - # Skip reporting a symbol if it's md5sums match but time regression in exe - # is below the threshold. The rationale for doing this is that an isolated - # perf regression in a symbol might be more likely due to noise or caching effects - # that may not be directly attributable to code-gen, and would need further investigation. - if [ x"$md5sum1" == x"$md5sum2" ] \ - && [ x"$md5sum1" != x"-1" ] \ - && [ x"$md5sum1" != x"d41d8cd98f00b204e9800998ecf8427e" ] \ - && [ x"$metric_id" == x"time" ] \ - && [ ! -f $run_step_artifacts/exe.regression ]; then - echo "SKIPPING $bmk:$symbol because md5sums match but differ in samples by $metric" >> $run_step_artifacts/$bmk.symbols-skipped - else - short_diag="$bmk:$short_symbol $short_diag" - long_diag="$bmk:$short_symbol $long_diag" - echo "$metric,$bmk,$symbol,$short_diag,$long_diag" >> $run_step_artifacts/$bmk.symbols-$change_kind - fi - fi - if [ x"$bmk" != x"$prev_bmk" ]; then - printf "++benchmarks %s " $bmk >> $run_step_artifacts/extra-bisect-params - prev_bmk="$bmk" - fi - else - result=1 - fi - echo "$bmk,$symbol,$result" >> ${rr[top_artifacts]}/results-vs-prev/compare-results.csv - fi - # Read from results-internal.csv instead of results.csv, because - # the latter may contain commas in demangled symbol names, which will - # interfere with parsing. - done < <(tail -n +2 ${rr[top_artifacts]}/results-vs-prev/results-internal.csv) - - printf "\n" >> $run_step_artifacts/extra-bisect-params - - # aggregate all *.symbol-regression into a file symbol.regression - find $run_step_artifacts/ -name "*.symbols-regression" -print0 | xargs -0 cat \ - | sort -gr -o $run_step_artifacts/symbol.regression - if [ x"$(cat $run_step_artifacts/symbol.regression)" = x"" ]; then - # Delete empty file - rm $run_step_artifacts/symbol.regression - fi - - # check if both "output-bmk-results.py" and "tcwg_bmk-build.sh" results are identical - rm -f $run_step_artifacts/output-bmk-results/diff-bmk-results.out - for resfile in {exe,symbol}.{regression,improvement}; do - $scripts/../bmk-scripts/diff-bmk-results.py $run_step_artifacts/$resfile \ - "$run_step_artifacts/output-bmk-results/$resfile" >> \ - "$run_step_artifacts/output-bmk-results/diff-bmk-results.out" || true - done + "$run_step_artifacts"/ \ + "${rr[mode]}" $verbose_opt || touch "$run_step_artifacts"/output-bmk-results-error_code-$? # copy inputs useful to build the mail / jira / .. to mail dir - for resfile in $run_step_artifacts/{exe,symbol}.{regression,improvement} \ - $run_step_artifacts/*.symbols-regression; do + for resfile in $run_step_artifacts/{exe,symbol}.{regression,improvement}; do if [ -f $resfile ]; then cp $resfile ${rr[top_artifacts]}/notify/ - if [[ "$resfile" =~ regression$ ]]; then - cat $resfile | cut -d, -f3,5 | sed -e "s/^/# /" \ - >> $run_step_artifacts/results.regressions - fi fi done ) |