tcwg_bmk-build.sh: Use output-bmk-results output for bmk

Details changes: 1) output-bmk-results now directly outputs in $run_step_artifacts {exe,symbol}.{regression,improvement} and extra-bisect-params files are all generated by output-bmk-results.py 2) $bmk.symbols-regression/improvement files doesn't exist anymore all details are now in symbol.regression/improvement 3) $bmk.symbols-skipped doesn't exist anymore All skipped exe/symbols are now in any.skipped. 4) results-vs-prev/compare-results.csv is not generated anymore Seemed to be useless. Change-Id: I4ef03ae52d97e42b14def96f1cc967d5ebf29123
author: Laurent Alfonsi <laurent.alfonsi@linaro.org> 2023-05-30 13:54:38 +0200
committer: Laurent Alfonsi <laurent.alfonsi@linaro.org> 2023-06-14 10:08:52 +0000
commit: 4fe33ced0edd10c418f8fa42a41f1cb5f78b1d43 (patch)
tree: d727ba33cf0989efeb9be26473c5dcab27218a65 /tcwg_bmk-build.sh
parent: b9163889304480d01479518b825605da6bdea0ea (diff)
1 files changed, 4 insertions, 246 deletions
diff --git a/tcwg_bmk-build.sh b/tcwg_bmk-build.sh
index 295df6a7..bbe76f7c 100755
--- a/tcwg_bmk-build.sh
+++ b/tcwg_bmk-build.sh
@@ -426,6 +426,7 @@ compare_results ()
     local -a compare_results_list=()
     tmpf=$(mktemp)
 
+    # FIXME:
     # To deal with some differences along base-artifacts recent history
     # - remove 'Failed for column' message from csv file
     # - skip emtpy csv files.
@@ -445,263 +446,20 @@ compare_results ()
 
     rm -rf "${history_csvs[0]}" "$tmpf"
 
-    # Temporarily store results of output-bmk-results.py into
-    # $run_step_artifacts/output-bmk-results. The intent is to monitor
-    # the results for few bmk runs and eventually replace comparison logic
-    # in this function.
-
     local verbose_opt=""
     if $verbose; then
 	verbose_opt="verbose"
     fi
-    mkdir -p "$run_step_artifacts"/output-bmk-results
     $scripts/../bmk-scripts/output-bmk-results.py \
 	${rr[top_artifacts]}/results-vs-prev/compare-results-internal.csv \
 	$run_step_artifacts/bmk-specific-variability.csv \
-	"$run_step_artifacts"/output-bmk-results \
-	"${rr[mode]}" $verbose_opt || touch "$run_step_artifacts"/output-bmk-results/error_code-$?
-
-    case "${rr[ci_project]}" in
-	*-code_vect-*) metric_id="vect" ;;
-	*-code_sve-*) metric_id="sve" ;;
-	*-code_size-*|*-fujitsu_size-*) metric_id="size" ;;
-	*-code_speed-*|*-fujitsu_speed-*) metric_id="time" ;;
-    esac
-
-    local regressed_by improved_by changed_by exe_threshold symbol_threshold
-
-    case $metric_id in
-	size)
-	    # We use 1% tolerance for binary size
-	    # and 10% tolerance for symbol size.
-	    exe_threshold=1
-	    symbol_threshold=10
-	    regressed_by="grew in size by"
-	    improved_by="reduced in size by"
-	    ;;
-	time)
-	    # We use 3% tolerance for binary speed
-	    # and 15% tolerance for symbol speed.
-	    exe_threshold=3
-	    symbol_threshold=15
-
-	    # Reduce thresholds when bisecting to avoid considering borderline
-	    # regressions as spurious.  This should break cycles of build and
-	    # bisect jobs triggering each other on borderline regressions.
-	    if [ x"${rr[mode]}" = x"bisect" ]; then
-		exe_threshold=2
-		symbol_threshold=10
-	    fi
-
-	    regressed_by="slowed down by"
-	    improved_by="speeds up by"
-	    ;;
-	vect|sve)
-	    exe_threshold=0
-	    symbol_threshold=0
-	    regressed_by="reduced by"
-	    improved_by="increased up by"
-	    ;;
-	*) assert false ;;
-    esac
-
-    local -a arr
-    local metric bmk symbol rtime rsize rvect rsve time1 time2 size1 size2 vect1 vect2 sve1 sve2
-    local long_diag short_symbol short_diag
-    local result prev_bmk
-    echo "bmk,symbol,result" > ${rr[top_artifacts]}/results-vs-prev/compare-results.csv
-    printf "extra_build_params=" > $run_step_artifacts/extra-bisect-params
-
-    # Read result lines from <(tail -n +2 ...) below.
-    # "-n +2" is to skip the header line.
-    prev_bmk=""
-    while IFS=, read -a arr; do
-	bmk=${arr[0]}
-	symbol=${arr[1]}
-	rtime=${arr[2]}
-	rsize=${arr[3]}
-	rvect=${arr[4]}
-	rsve=${arr[5]}
-	# $arr[6] is used to store rel_symbol_md5sum, ignore it.
-	time1=${arr[7]}
-	time2=${arr[8]}
-	size1=${arr[9]}
-	size2=${arr[10]}
-	vect1=${arr[11]}
-	vect2=${arr[12]}
-	sve1=${arr[13]}
-	sve2=${arr[14]}
-	md5sum1=${arr[15]}
-	md5sum2=${arr[16]}
-
-	case $metric_id in
-	    size) metric=$rsize ;;
-	    time) metric=$rtime ;;
-	    vect) metric=$rvect ;;
-	    sve) metric=$rsve ;;
-	    *) assert false ;;
-	esac
-
-	# We don't want to track regressions on Mean benchmark. Mean variations are
-	# interesting, but we should never bisect. Don't go furhter in this loop.
-	if [ "$bmk" == "Mean" ]; then
-	  continue
-	fi
-
-	# Skip case where we have no info ("n/a")
-	if [ "$metric" != "n/a" ]; then
-	    metric=$(($metric - 100))
-	    # For vect or sve metric, relative value < 100 will be a regression.
-	    # So effectively for any metric, if $metric is positive,
-	    # then it's a regression.
-	    if [ x"$metric_id" = x"vect" ] || [ x"$metric_id" = x"sve" ]; then
-		metric=$((-$metric))
-	    fi
-	    # Remove padding from the tail of $symbol (padding is added by
-	    # csvs2table.py for better formatting).
-	    short_symbol="$(echo "$symbol" | sed -e "s/ *\$//")"
-
-	    local bmk_exe
-	    case "$short_symbol" in
-		"["*) bmk_exe=false ;;
-		*"_base.default") bmk_exe=true ;;
-		*) bmk_exe=false ;;
-	    esac
-
-	    local threshold neg_threshold
-	    if $bmk_exe; then
-		threshold=$exe_threshold
-		neg_threshold=$((-exe_threshold))
-	    else
-		threshold=$symbol_threshold
-		neg_threshold=$((-symbol_threshold))
-	    fi
-
-	    if [ "$metric" -gt "$threshold" ] || [ "$metric" -lt "$neg_threshold" ]; then
-	        if [ "$metric" -gt "$threshold" ]; then
-		   change_kind="regression"
-		   changed_by=$regressed_by
-		else
-		   change_kind="improvement"
-		   changed_by=$improved_by
-		fi
-
-		# There's a regression
-		result=100
-
-		case $metric_id in
-		    size)
-			short_diag="$changed_by ${metric}%"
-			long_diag="$short_diag from $size1 to $size2 bytes"
-			;;
-		    time)
-			short_diag="$changed_by ${metric}%"
-			long_diag="$short_diag from $time1 to $time2 perf samples" ;;
-		    vect)
-			short_diag="$changed_by ${metric}%"
-			long_diag="$short_diag from $vect1 to $vect2" ;;
-		    sve)
-			short_diag="$changed_by ${metric}%"
-			long_diag="$short_diag from $sve1 to $sve2" ;;
-		    *) assert false ;;
-		esac
-
-		if $bmk_exe; then
-		    short_diag="$bmk $short_diag"
-		    long_diag="$bmk $long_diag"
-		    # Detect magic sample counts that indicate failure to build
-		    # and failure to run
-		    case "$time1:$time2" in
-			888888888:888888888|999999999:999999999)
-			    # Should never happen as we have neither
-			    # an improvement nor a regression.
-			    assert false
-			    ;;
-			*:999999999)
-			    change_kind="regression"
-			    short_diag="$bmk failed to build"
-			    long_diag="$short_diag"
-			    ;;
-			999999999:888888888)
-			    change_kind="improvement"
-			    short_diag="$bmk built OK, but failed to run"
-			    long_diag="$short_diag"
-			    ;;
-			*:888888888)
-			    change_kind="regression"
-			    short_diag="$bmk failed to run"
-			    long_diag="$short_diag"
-			    ;;
-			888888888:*)
-			    change_kind="improvement"
-			    short_diag="$bmk run OK"
-			    long_diag="$short_diag"
-			    ;;
-			999999999:*)
-			    change_kind="improvement"
-			    short_diag="$bmk built and run OK"
-			    long_diag="$short_diag"
-			    ;;
-		    esac
-		    echo "$metric,$bmk,$symbol,$short_diag,$long_diag" >> $run_step_artifacts/exe.$change_kind
-		else
-		    # Skip reporting a symbol if it's md5sums match but time regression in exe
-		    # is below the threshold. The rationale for doing this is that an isolated
-		    # perf regression in a symbol might be more likely due to noise or caching effects
-		    # that may not be directly attributable to code-gen, and would need further investigation.
-		    if [ x"$md5sum1" == x"$md5sum2" ] \
-		       && [ x"$md5sum1" != x"-1" ] \
-		       && [ x"$md5sum1" != x"d41d8cd98f00b204e9800998ecf8427e" ] \
-		       && [ x"$metric_id" == x"time" ] \
-		       && [ ! -f $run_step_artifacts/exe.regression ]; then
-			echo "SKIPPING $bmk:$symbol because md5sums match but differ in samples by $metric" >> $run_step_artifacts/$bmk.symbols-skipped
-		    else
-			short_diag="$bmk:$short_symbol $short_diag"
-			long_diag="$bmk:$short_symbol $long_diag"
-			echo "$metric,$bmk,$symbol,$short_diag,$long_diag" >> $run_step_artifacts/$bmk.symbols-$change_kind
-		    fi
-		fi
-		if [ x"$bmk" != x"$prev_bmk" ]; then
-		    printf "++benchmarks %s " $bmk >> $run_step_artifacts/extra-bisect-params
-		    prev_bmk="$bmk"
-		fi
-	    else
-		result=1
-	    fi
-	    echo "$bmk,$symbol,$result" >> ${rr[top_artifacts]}/results-vs-prev/compare-results.csv
-	fi
-    # Read from results-internal.csv instead of results.csv, because
-    # the latter may contain commas in demangled symbol names, which will
-    # interfere with parsing.
-    done < <(tail -n +2 ${rr[top_artifacts]}/results-vs-prev/results-internal.csv)
-
-    printf "\n" >> $run_step_artifacts/extra-bisect-params
-
-    # aggregate all *.symbol-regression into a file symbol.regression
-    find $run_step_artifacts/ -name "*.symbols-regression" -print0 | xargs -0 cat \
-	| sort -gr -o $run_step_artifacts/symbol.regression
-    if [ x"$(cat $run_step_artifacts/symbol.regression)" = x"" ]; then
-	# Delete empty file
-	rm $run_step_artifacts/symbol.regression
-    fi
-
-    # check if both "output-bmk-results.py" and "tcwg_bmk-build.sh" results are identical
-    rm -f $run_step_artifacts/output-bmk-results/diff-bmk-results.out
-    for resfile in {exe,symbol}.{regression,improvement}; do
-       $scripts/../bmk-scripts/diff-bmk-results.py $run_step_artifacts/$resfile \
-         "$run_step_artifacts/output-bmk-results/$resfile" >> \
-         "$run_step_artifacts/output-bmk-results/diff-bmk-results.out" || true
-    done
+	"$run_step_artifacts"/ \
+	"${rr[mode]}" $verbose_opt || touch "$run_step_artifacts"/output-bmk-results-error_code-$?
 
     # copy inputs useful to build the mail / jira / .. to mail dir
-    for resfile in $run_step_artifacts/{exe,symbol}.{regression,improvement} \
-                   $run_step_artifacts/*.symbols-regression; do
+    for resfile in $run_step_artifacts/{exe,symbol}.{regression,improvement}; do
         if [ -f $resfile ]; then
 	   cp $resfile ${rr[top_artifacts]}/notify/
-	   if [[ "$resfile" =~ regression$ ]]; then
-	      cat $resfile | cut -d, -f3,5 | sed -e "s/^/# /" \
-	         >> $run_step_artifacts/results.regressions
-	   fi
 	fi
     done
     )
author	Laurent Alfonsi <laurent.alfonsi@linaro.org>	2023-05-30 13:54:38 +0200
committer	Laurent Alfonsi <laurent.alfonsi@linaro.org>	2023-06-14 10:08:52 +0000
commit	4fe33ced0edd10c418f8fa42a41f1cb5f78b1d43 (patch)
tree	d727ba33cf0989efeb9be26473c5dcab27218a65 /tcwg_bmk-build.sh
parent	b9163889304480d01479518b825605da6bdea0ea (diff)