summaryrefslogtreecommitdiff
path: root/tcwg_bmk-build.sh
diff options
context:
space:
mode:
authorLaurent Alfonsi <laurent.alfonsi@linaro.org>2023-05-30 13:54:38 +0200
committerLaurent Alfonsi <laurent.alfonsi@linaro.org>2023-06-14 10:08:52 +0000
commit4fe33ced0edd10c418f8fa42a41f1cb5f78b1d43 (patch)
treed727ba33cf0989efeb9be26473c5dcab27218a65 /tcwg_bmk-build.sh
parentb9163889304480d01479518b825605da6bdea0ea (diff)
tcwg_bmk-build.sh: Use output-bmk-results output for bmk
Details changes: 1) output-bmk-results now directly outputs in $run_step_artifacts {exe,symbol}.{regression,improvement} and extra-bisect-params files are all generated by output-bmk-results.py 2) $bmk.symbols-regression/improvement files doesn't exist anymore all details are now in symbol.regression/improvement 3) $bmk.symbols-skipped doesn't exist anymore All skipped exe/symbols are now in any.skipped. 4) results-vs-prev/compare-results.csv is not generated anymore Seemed to be useless. Change-Id: I4ef03ae52d97e42b14def96f1cc967d5ebf29123
Diffstat (limited to 'tcwg_bmk-build.sh')
-rwxr-xr-xtcwg_bmk-build.sh250
1 files changed, 4 insertions, 246 deletions
diff --git a/tcwg_bmk-build.sh b/tcwg_bmk-build.sh
index 295df6a7..bbe76f7c 100755
--- a/tcwg_bmk-build.sh
+++ b/tcwg_bmk-build.sh
@@ -426,6 +426,7 @@ compare_results ()
local -a compare_results_list=()
tmpf=$(mktemp)
+ # FIXME:
# To deal with some differences along base-artifacts recent history
# - remove 'Failed for column' message from csv file
# - skip emtpy csv files.
@@ -445,263 +446,20 @@ compare_results ()
rm -rf "${history_csvs[0]}" "$tmpf"
- # Temporarily store results of output-bmk-results.py into
- # $run_step_artifacts/output-bmk-results. The intent is to monitor
- # the results for few bmk runs and eventually replace comparison logic
- # in this function.
-
local verbose_opt=""
if $verbose; then
verbose_opt="verbose"
fi
- mkdir -p "$run_step_artifacts"/output-bmk-results
$scripts/../bmk-scripts/output-bmk-results.py \
${rr[top_artifacts]}/results-vs-prev/compare-results-internal.csv \
$run_step_artifacts/bmk-specific-variability.csv \
- "$run_step_artifacts"/output-bmk-results \
- "${rr[mode]}" $verbose_opt || touch "$run_step_artifacts"/output-bmk-results/error_code-$?
-
- case "${rr[ci_project]}" in
- *-code_vect-*) metric_id="vect" ;;
- *-code_sve-*) metric_id="sve" ;;
- *-code_size-*|*-fujitsu_size-*) metric_id="size" ;;
- *-code_speed-*|*-fujitsu_speed-*) metric_id="time" ;;
- esac
-
- local regressed_by improved_by changed_by exe_threshold symbol_threshold
-
- case $metric_id in
- size)
- # We use 1% tolerance for binary size
- # and 10% tolerance for symbol size.
- exe_threshold=1
- symbol_threshold=10
- regressed_by="grew in size by"
- improved_by="reduced in size by"
- ;;
- time)
- # We use 3% tolerance for binary speed
- # and 15% tolerance for symbol speed.
- exe_threshold=3
- symbol_threshold=15
-
- # Reduce thresholds when bisecting to avoid considering borderline
- # regressions as spurious. This should break cycles of build and
- # bisect jobs triggering each other on borderline regressions.
- if [ x"${rr[mode]}" = x"bisect" ]; then
- exe_threshold=2
- symbol_threshold=10
- fi
-
- regressed_by="slowed down by"
- improved_by="speeds up by"
- ;;
- vect|sve)
- exe_threshold=0
- symbol_threshold=0
- regressed_by="reduced by"
- improved_by="increased up by"
- ;;
- *) assert false ;;
- esac
-
- local -a arr
- local metric bmk symbol rtime rsize rvect rsve time1 time2 size1 size2 vect1 vect2 sve1 sve2
- local long_diag short_symbol short_diag
- local result prev_bmk
- echo "bmk,symbol,result" > ${rr[top_artifacts]}/results-vs-prev/compare-results.csv
- printf "extra_build_params=" > $run_step_artifacts/extra-bisect-params
-
- # Read result lines from <(tail -n +2 ...) below.
- # "-n +2" is to skip the header line.
- prev_bmk=""
- while IFS=, read -a arr; do
- bmk=${arr[0]}
- symbol=${arr[1]}
- rtime=${arr[2]}
- rsize=${arr[3]}
- rvect=${arr[4]}
- rsve=${arr[5]}
- # $arr[6] is used to store rel_symbol_md5sum, ignore it.
- time1=${arr[7]}
- time2=${arr[8]}
- size1=${arr[9]}
- size2=${arr[10]}
- vect1=${arr[11]}
- vect2=${arr[12]}
- sve1=${arr[13]}
- sve2=${arr[14]}
- md5sum1=${arr[15]}
- md5sum2=${arr[16]}
-
- case $metric_id in
- size) metric=$rsize ;;
- time) metric=$rtime ;;
- vect) metric=$rvect ;;
- sve) metric=$rsve ;;
- *) assert false ;;
- esac
-
- # We don't want to track regressions on Mean benchmark. Mean variations are
- # interesting, but we should never bisect. Don't go furhter in this loop.
- if [ "$bmk" == "Mean" ]; then
- continue
- fi
-
- # Skip case where we have no info ("n/a")
- if [ "$metric" != "n/a" ]; then
- metric=$(($metric - 100))
- # For vect or sve metric, relative value < 100 will be a regression.
- # So effectively for any metric, if $metric is positive,
- # then it's a regression.
- if [ x"$metric_id" = x"vect" ] || [ x"$metric_id" = x"sve" ]; then
- metric=$((-$metric))
- fi
- # Remove padding from the tail of $symbol (padding is added by
- # csvs2table.py for better formatting).
- short_symbol="$(echo "$symbol" | sed -e "s/ *\$//")"
-
- local bmk_exe
- case "$short_symbol" in
- "["*) bmk_exe=false ;;
- *"_base.default") bmk_exe=true ;;
- *) bmk_exe=false ;;
- esac
-
- local threshold neg_threshold
- if $bmk_exe; then
- threshold=$exe_threshold
- neg_threshold=$((-exe_threshold))
- else
- threshold=$symbol_threshold
- neg_threshold=$((-symbol_threshold))
- fi
-
- if [ "$metric" -gt "$threshold" ] || [ "$metric" -lt "$neg_threshold" ]; then
- if [ "$metric" -gt "$threshold" ]; then
- change_kind="regression"
- changed_by=$regressed_by
- else
- change_kind="improvement"
- changed_by=$improved_by
- fi
-
- # There's a regression
- result=100
-
- case $metric_id in
- size)
- short_diag="$changed_by ${metric}%"
- long_diag="$short_diag from $size1 to $size2 bytes"
- ;;
- time)
- short_diag="$changed_by ${metric}%"
- long_diag="$short_diag from $time1 to $time2 perf samples" ;;
- vect)
- short_diag="$changed_by ${metric}%"
- long_diag="$short_diag from $vect1 to $vect2" ;;
- sve)
- short_diag="$changed_by ${metric}%"
- long_diag="$short_diag from $sve1 to $sve2" ;;
- *) assert false ;;
- esac
-
- if $bmk_exe; then
- short_diag="$bmk $short_diag"
- long_diag="$bmk $long_diag"
- # Detect magic sample counts that indicate failure to build
- # and failure to run
- case "$time1:$time2" in
- 888888888:888888888|999999999:999999999)
- # Should never happen as we have neither
- # an improvement nor a regression.
- assert false
- ;;
- *:999999999)
- change_kind="regression"
- short_diag="$bmk failed to build"
- long_diag="$short_diag"
- ;;
- 999999999:888888888)
- change_kind="improvement"
- short_diag="$bmk built OK, but failed to run"
- long_diag="$short_diag"
- ;;
- *:888888888)
- change_kind="regression"
- short_diag="$bmk failed to run"
- long_diag="$short_diag"
- ;;
- 888888888:*)
- change_kind="improvement"
- short_diag="$bmk run OK"
- long_diag="$short_diag"
- ;;
- 999999999:*)
- change_kind="improvement"
- short_diag="$bmk built and run OK"
- long_diag="$short_diag"
- ;;
- esac
- echo "$metric,$bmk,$symbol,$short_diag,$long_diag" >> $run_step_artifacts/exe.$change_kind
- else
- # Skip reporting a symbol if it's md5sums match but time regression in exe
- # is below the threshold. The rationale for doing this is that an isolated
- # perf regression in a symbol might be more likely due to noise or caching effects
- # that may not be directly attributable to code-gen, and would need further investigation.
- if [ x"$md5sum1" == x"$md5sum2" ] \
- && [ x"$md5sum1" != x"-1" ] \
- && [ x"$md5sum1" != x"d41d8cd98f00b204e9800998ecf8427e" ] \
- && [ x"$metric_id" == x"time" ] \
- && [ ! -f $run_step_artifacts/exe.regression ]; then
- echo "SKIPPING $bmk:$symbol because md5sums match but differ in samples by $metric" >> $run_step_artifacts/$bmk.symbols-skipped
- else
- short_diag="$bmk:$short_symbol $short_diag"
- long_diag="$bmk:$short_symbol $long_diag"
- echo "$metric,$bmk,$symbol,$short_diag,$long_diag" >> $run_step_artifacts/$bmk.symbols-$change_kind
- fi
- fi
- if [ x"$bmk" != x"$prev_bmk" ]; then
- printf "++benchmarks %s " $bmk >> $run_step_artifacts/extra-bisect-params
- prev_bmk="$bmk"
- fi
- else
- result=1
- fi
- echo "$bmk,$symbol,$result" >> ${rr[top_artifacts]}/results-vs-prev/compare-results.csv
- fi
- # Read from results-internal.csv instead of results.csv, because
- # the latter may contain commas in demangled symbol names, which will
- # interfere with parsing.
- done < <(tail -n +2 ${rr[top_artifacts]}/results-vs-prev/results-internal.csv)
-
- printf "\n" >> $run_step_artifacts/extra-bisect-params
-
- # aggregate all *.symbol-regression into a file symbol.regression
- find $run_step_artifacts/ -name "*.symbols-regression" -print0 | xargs -0 cat \
- | sort -gr -o $run_step_artifacts/symbol.regression
- if [ x"$(cat $run_step_artifacts/symbol.regression)" = x"" ]; then
- # Delete empty file
- rm $run_step_artifacts/symbol.regression
- fi
-
- # check if both "output-bmk-results.py" and "tcwg_bmk-build.sh" results are identical
- rm -f $run_step_artifacts/output-bmk-results/diff-bmk-results.out
- for resfile in {exe,symbol}.{regression,improvement}; do
- $scripts/../bmk-scripts/diff-bmk-results.py $run_step_artifacts/$resfile \
- "$run_step_artifacts/output-bmk-results/$resfile" >> \
- "$run_step_artifacts/output-bmk-results/diff-bmk-results.out" || true
- done
+ "$run_step_artifacts"/ \
+ "${rr[mode]}" $verbose_opt || touch "$run_step_artifacts"/output-bmk-results-error_code-$?
# copy inputs useful to build the mail / jira / .. to mail dir
- for resfile in $run_step_artifacts/{exe,symbol}.{regression,improvement} \
- $run_step_artifacts/*.symbols-regression; do
+ for resfile in $run_step_artifacts/{exe,symbol}.{regression,improvement}; do
if [ -f $resfile ]; then
cp $resfile ${rr[top_artifacts]}/notify/
- if [[ "$resfile" =~ regression$ ]]; then
- cat $resfile | cut -d, -f3,5 | sed -e "s/^/# /" \
- >> $run_step_artifacts/results.regressions
- fi
fi
done
)