9 files changed, 77 insertions, 123 deletions
diff --git a/notify/any.skipped b/notify/any.skipped
deleted file mode 100644
index fa1ba6a..0000000
--- a/notify/any.skipped
+++ /dev/null
@@ -1 +0,0 @@
-541.leela_r,leela_r_base.default,541.leela_r run OK,541.leela_r run OK
diff --git a/notify/exe.regression b/notify/exe.regression
new file mode 100644
index 0000000..501f9d5
--- /dev/null
+++ b/notify/exe.regression
@@ -0,0 +1 @@
+-1,541.leela_r,leela_r_base.default,541.leela_r failed to run,541.leela_r failed to run
diff --git a/notify/extra-bisect-params b/notify/extra-bisect-params
index fa6c7c9..f144c18 100644
--- a/notify/extra-bisect-params
+++ b/notify/extra-bisect-params
@@ -1 +1 @@
-extra_build_params=
+extra_build_params=++benchmarks 541.leela_r 
diff --git a/notify/jira/comment-template.txt b/notify/jira/comment-template.txt
index 6e02c0a..f4dc34f 100644
--- a/notify/jira/comment-template.txt
+++ b/notify/jira/comment-template.txt
@@ -1,3 +1,3 @@
 [LLVM-651]
-No change
-Details: https://ci.linaro.org/job/tcwg_bmk-code_speed-cpu2017rate--llvm-arm-master-O2-build/71/artifact/artifacts/notify/mail-body.txt/*view*/
+541.leela_r failed to run
+Details: https://ci.linaro.org/job/tcwg_bmk-code_speed-cpu2017rate--llvm-arm-master-O2-build/73/artifact/artifacts/notify/mail-body.txt/*view*/
diff --git a/notify/mail-body.txt b/notify/mail-body.txt
index 62a03c8..a2b43c5 100644
--- a/notify/mail-body.txt
+++ b/notify/mail-body.txt
@@ -2,15 +2,10 @@ Dear contributor, our automatic CI has detected problems related to your patch(e
 
 In CI config tcwg_bmk-code_speed-cpu2017rate/llvm-arm-master-O2 after:
 
-  | 629 commits in llvm
-  | a09e32e5fe13 [InstSimplify] Respect UseInstrInfo in more folds
-  | 78c7201d6835 [NewGVN] Regenerate test checks (NFC)
-  | 1d3e38bfb763 [NewGVN] Add test for #53218 (NFC)
-  | e158add121dd [InstCombine] Canonicalize `icmp eq/ne (A ^ C), B` to `icmp eq/ne (A ^ B), C` (#67273)
-  | 31631d307fe0 [X86][FP16] Add missing handling for FP16 constrained cmp intrinsics (#67400)
-  | ... and 624 more commits in llvm
+  | baseline build
 
-No change
+the following benchmarks slowed down by more than 3%:
+- 541.leela_r failed to run
 
 Below reproducer instructions can be used to re-build both "first_bad" and "last_good" cross-toolchains used in this bisection.  Naturally, the scripts will fail when triggerring benchmarking jobs if you don\'t have access to Linaro TCWG CI.
 
@@ -27,6 +22,6 @@ This benchmarking CI is work-in-progress, and we welcome feedback and suggestion
 -----------------8<--------------------------8<--------------------------8<--------------------------
 The information below can be used to reproduce a debug environment:
 
-Current build   : https://ci.linaro.org/job/tcwg_bmk-code_speed-cpu2017rate--llvm-arm-master-O2-build/71/artifact/artifacts
-Reference build : https://ci.linaro.org/job/tcwg_bmk-code_speed-cpu2017rate--llvm-arm-master-O2-build/70/artifact/artifacts
+Current build   : https://ci.linaro.org/job/tcwg_bmk-code_speed-cpu2017rate--llvm-arm-master-O2-build/73/artifact/artifacts
+Reference build : https://ci.linaro.org/job/tcwg_bmk-code_speed-cpu2017rate--llvm-arm-master-O2-build/71/artifact/artifacts
 
diff --git a/notify/mail-recipients.txt b/notify/mail-recipients.txt
index 56b65bb..aa219ef 100644
--- a/notify/mail-recipients.txt
+++ b/notify/mail-recipients.txt
@@ -1 +1 @@
-bcc:tcwg-validation@linaro.org,maxim.kuvyrkov@linaro.org
+bcc:tcwg-validation@linaro.org
diff --git a/notify/mail-subject.txt b/notify/mail-subject.txt
index 652d9d8..18c8226 100644
--- a/notify/mail-subject.txt
+++ b/notify/mail-subject.txt
@@ -1 +1 @@
-[Linaro-TCWG-CI] 629 commits in llvm: No change
+[Linaro-TCWG-CI] baseline build: 541.leela_r failed to run
diff --git a/notify/output-bmk-results.log b/notify/output-bmk-results.log
index aa6bfe1..5d15716 100644
--- a/notify/output-bmk-results.log
+++ b/notify/output-bmk-results.log
@@ -291,9 +291,7 @@ output-bmk-results.py(99):        spec_thr = get_specific_thresholds(metric, mod
 output-bmk-results.py(57):     if specific_variability is None:
 output-bmk-results.py(60):     var = specific_variability[ (specific_variability['benchmark'] == bmk) & (specific_variability['symbol'].str.strip() == symb)]
 output-bmk-results.py(61):     if var.empty:
-output-bmk-results.py(63):     elif len(var)>1:
-output-bmk-results.py(68):     if var.iloc[0]['sample_variation_average']>0 :
-output-bmk-results.py(83):     return np.nan
+output-bmk-results.py(62):         return np.nan
 output-bmk-results.py(100):        if not np.isnan(spec_thr):
 output-bmk-results.py(107):     if metric == "num_vect_loops" or metric == "num_sve_loops":
 output-bmk-results.py(110):     return default_threshold[(change_kind,metric,mode)]
@@ -371,7 +369,7 @@ output-bmk-results.py(105):           return spec_thr
 output-bmk-results.py(220):         print("DEBUG: checking {0}.{1} : {2},{3} : {4}={5}% (threshold={6}%)"\
 output-bmk-results.py(221):              .format(sym_type, change_kind, row["benchmark"], row["symbol"], metric, 100-row["rel_" + metric], threshold))
 output-bmk-results.py(220):         print("DEBUG: checking {0}.{1} : {2},{3} : {4}={5}% (threshold={6}%)"\
-DEBUG: checking exe.regression : 531.deepsjeng_r,deepsjeng_r_base.default : sample=1% (threshold=3%)
+DEBUG: checking exe.regression : 531.deepsjeng_r,deepsjeng_r_base.default : sample=0% (threshold=3.12%)
 output-bmk-results.py(224):         if not is_entry_xxx[change_kind](metric, row["rel_" + metric], threshold):
  --- modulename: output-bmk-results, funcname: is_entry_regression
 output-bmk-results.py(175):     if metric in metric_utils.higher_regress_metrics:
@@ -386,19 +384,49 @@ output-bmk-results.py(99):        spec_thr = get_specific_thresholds(metric, mod
 output-bmk-results.py(57):     if specific_variability is None:
 output-bmk-results.py(60):     var = specific_variability[ (specific_variability['benchmark'] == bmk) & (specific_variability['symbol'].str.strip() == symb)]
 output-bmk-results.py(61):     if var.empty:
-output-bmk-results.py(62):         return np.nan
+output-bmk-results.py(63):     elif len(var)>1:
+output-bmk-results.py(68):     if var.iloc[0]['sample_variation_average']>0 :
+output-bmk-results.py(83):     return np.nan
 output-bmk-results.py(100):        if not np.isnan(spec_thr):
 output-bmk-results.py(107):     if metric == "num_vect_loops" or metric == "num_sve_loops":
 output-bmk-results.py(110):     return default_threshold[(change_kind,metric,mode)]
 output-bmk-results.py(220):         print("DEBUG: checking {0}.{1} : {2},{3} : {4}={5}% (threshold={6}%)"\
 output-bmk-results.py(221):              .format(sym_type, change_kind, row["benchmark"], row["symbol"], metric, 100-row["rel_" + metric], threshold))
 output-bmk-results.py(220):         print("DEBUG: checking {0}.{1} : {2},{3} : {4}={5}% (threshold={6}%)"\
-DEBUG: checking exe.regression : 541.leela_r,leela_r_base.default : sample=100% (threshold=3%)
+DEBUG: checking exe.regression : 541.leela_r,leela_r_base.default : sample=-2147483547% (threshold=3%)
 output-bmk-results.py(224):         if not is_entry_xxx[change_kind](metric, row["rel_" + metric], threshold):
  --- modulename: output-bmk-results, funcname: is_entry_regression
 output-bmk-results.py(175):     if metric in metric_utils.higher_regress_metrics:
 output-bmk-results.py(176):         return (result - 100 > threshold)
-output-bmk-results.py(225):             continue
+output-bmk-results.py(227):         percent_change, short_diag, long_diag = get_short_long_diag(row, metric, sym_type, change_kind)
+ --- modulename: output-bmk-results, funcname: get_short_long_diag
+output-bmk-results.py(113):     bmk = row["benchmark"]
+output-bmk-results.py(114):     rel_value = row["rel_" + metric]
+output-bmk-results.py(115):     prev_value = row[metric + "_x"]
+output-bmk-results.py(116):     curr_value = row[metric + "_y"]
+output-bmk-results.py(118):     if metric == "sample":
+output-bmk-results.py(119):         if curr_value == 999999999:
+output-bmk-results.py(122):         elif curr_value == 888888888:
+output-bmk-results.py(123):             short_diag = "{0} failed to run".format(bmk)
+output-bmk-results.py(124):             return -1, short_diag, short_diag
+output-bmk-results.py(231):         if metric == "sample" \
+output-bmk-results.py(232):           and row['symbol_md5sum_x'] == row['symbol_md5sum_y'] \
+output-bmk-results.py(233):           and row['symbol_md5sum_x'] != "-1" \
+output-bmk-results.py(238):         print("DEBUG: *** {0},{1} : {2}".format(row["benchmark"], row["symbol"], long_diag))
+DEBUG: *** 541.leela_r,leela_r_base.default : 541.leela_r failed to run
+output-bmk-results.py(240):         f_out.write_csv((percent_change, row["benchmark"], row["symbol"], short_diag, long_diag))
+ --- modulename: output-bmk-results, funcname: write_csv
+output-bmk-results.py(41):     if not self.predicate or not self.csvwriter:
+output-bmk-results.py(43):     self.csvwriter.writerow(arr)
+output-bmk-results.py(241):         if change_kind == "regression":
+output-bmk-results.py(242):             f_regr.write("# {0},{1}\n".format(row["symbol"], long_diag))
+ --- modulename: output-bmk-results, funcname: write
+output-bmk-results.py(36):     if not self.predicate or not self.outf:
+output-bmk-results.py(38):     self.outf.write(string)
+output-bmk-results.py(243):             f_ebp.write("++benchmarks {0} ".format(row["benchmark"]))
+ --- modulename: output-bmk-results, funcname: write
+output-bmk-results.py(36):     if not self.predicate or not self.outf:
+output-bmk-results.py(38):     self.outf.write(string)
 output-bmk-results.py(216):     for index, row in out_df.iterrows():
 output-bmk-results.py(218):         threshold = get_threshold(sym_type, metric, mode, row["benchmark"], row["symbol"])
  --- modulename: output-bmk-results, funcname: get_threshold
@@ -454,7 +482,6 @@ output-bmk-results.py(245):     f_out.close()
 output-bmk-results.py(29):     if not self.outf:
 output-bmk-results.py(31):     self.outf.close()
 output-bmk-results.py(32):     if os.stat(self.filename).st_size == 0:
-output-bmk-results.py(33):        os.remove(self.filename)
 output-bmk-results.py(263):     output_bmk_results_1(exe_df, "exe", "improvement", None, f_skip, None, run_step_artifacts, metric, mode, details)
  --- modulename: output-bmk-results, funcname: output_bmk_results_1
 output-bmk-results.py(210):     f_out = Outfile("{0}/{1}.{2}".format(run_step_artifacts, sym_type, change_kind), "w", predicate=(details=="verbose"))
@@ -658,9 +685,7 @@ output-bmk-results.py(99):        spec_thr = get_specific_thresholds(metric, mod
 output-bmk-results.py(57):     if specific_variability is None:
 output-bmk-results.py(60):     var = specific_variability[ (specific_variability['benchmark'] == bmk) & (specific_variability['symbol'].str.strip() == symb)]
 output-bmk-results.py(61):     if var.empty:
-output-bmk-results.py(63):     elif len(var)>1:
-output-bmk-results.py(68):     if var.iloc[0]['sample_variation_average']>0 :
-output-bmk-results.py(83):     return np.nan
+output-bmk-results.py(62):         return np.nan
 output-bmk-results.py(100):        if not np.isnan(spec_thr):
 output-bmk-results.py(107):     if metric == "num_vect_loops" or metric == "num_sve_loops":
 output-bmk-results.py(110):     return default_threshold[(change_kind,metric,mode)]
@@ -738,7 +763,7 @@ output-bmk-results.py(105):           return spec_thr
 output-bmk-results.py(220):         print("DEBUG: checking {0}.{1} : {2},{3} : {4}={5}% (threshold={6}%)"\
 output-bmk-results.py(221):              .format(sym_type, change_kind, row["benchmark"], row["symbol"], metric, 100-row["rel_" + metric], threshold))
 output-bmk-results.py(220):         print("DEBUG: checking {0}.{1} : {2},{3} : {4}={5}% (threshold={6}%)"\
-DEBUG: checking exe.improvement : 531.deepsjeng_r,deepsjeng_r_base.default : sample=1% (threshold=3%)
+DEBUG: checking exe.improvement : 531.deepsjeng_r,deepsjeng_r_base.default : sample=0% (threshold=3.12%)
 output-bmk-results.py(224):         if not is_entry_xxx[change_kind](metric, row["rel_" + metric], threshold):
  --- modulename: output-bmk-results, funcname: is_entry_improvement
 output-bmk-results.py(184):     if metric in metric_utils.higher_regress_metrics:
@@ -753,40 +778,21 @@ output-bmk-results.py(99):        spec_thr = get_specific_thresholds(metric, mod
 output-bmk-results.py(57):     if specific_variability is None:
 output-bmk-results.py(60):     var = specific_variability[ (specific_variability['benchmark'] == bmk) & (specific_variability['symbol'].str.strip() == symb)]
 output-bmk-results.py(61):     if var.empty:
-output-bmk-results.py(62):         return np.nan
+output-bmk-results.py(63):     elif len(var)>1:
+output-bmk-results.py(68):     if var.iloc[0]['sample_variation_average']>0 :
+output-bmk-results.py(83):     return np.nan
 output-bmk-results.py(100):        if not np.isnan(spec_thr):
 output-bmk-results.py(107):     if metric == "num_vect_loops" or metric == "num_sve_loops":
 output-bmk-results.py(110):     return default_threshold[(change_kind,metric,mode)]
 output-bmk-results.py(220):         print("DEBUG: checking {0}.{1} : {2},{3} : {4}={5}% (threshold={6}%)"\
 output-bmk-results.py(221):              .format(sym_type, change_kind, row["benchmark"], row["symbol"], metric, 100-row["rel_" + metric], threshold))
 output-bmk-results.py(220):         print("DEBUG: checking {0}.{1} : {2},{3} : {4}={5}% (threshold={6}%)"\
-DEBUG: checking exe.improvement : 541.leela_r,leela_r_base.default : sample=100% (threshold=3%)
+DEBUG: checking exe.improvement : 541.leela_r,leela_r_base.default : sample=-2147483547% (threshold=3%)
 output-bmk-results.py(224):         if not is_entry_xxx[change_kind](metric, row["rel_" + metric], threshold):
  --- modulename: output-bmk-results, funcname: is_entry_improvement
 output-bmk-results.py(184):     if metric in metric_utils.higher_regress_metrics:
 output-bmk-results.py(185):         return (100 - result > threshold)
-output-bmk-results.py(227):         percent_change, short_diag, long_diag = get_short_long_diag(row, metric, sym_type, change_kind)
- --- modulename: output-bmk-results, funcname: get_short_long_diag
-output-bmk-results.py(113):     bmk = row["benchmark"]
-output-bmk-results.py(114):     rel_value = row["rel_" + metric]
-output-bmk-results.py(115):     prev_value = row[metric + "_x"]
-output-bmk-results.py(116):     curr_value = row[metric + "_y"]
-output-bmk-results.py(118):     if metric == "sample":
-output-bmk-results.py(119):         if curr_value == 999999999:
-output-bmk-results.py(122):         elif curr_value == 888888888:
-output-bmk-results.py(125):         elif prev_value == 999999999 and curr_value == 888888888:
-output-bmk-results.py(128):         elif prev_value == 888888888 and curr_value < 888888888:
-output-bmk-results.py(129):             short_diag = "{0} run OK".format(bmk)
-output-bmk-results.py(130):             return -1, short_diag, short_diag
-output-bmk-results.py(231):         if metric == "sample" \
-output-bmk-results.py(232):           and row['symbol_md5sum_x'] == row['symbol_md5sum_y'] \
-output-bmk-results.py(233):           and row['symbol_md5sum_x'] != "-1" \
-output-bmk-results.py(234):           and row['symbol_md5sum_x'] != "d41d8cd98f00b204e9800998ecf8427e":
-output-bmk-results.py(235):             f_skip.write_csv((row["benchmark"], row["symbol"], short_diag, long_diag))
- --- modulename: output-bmk-results, funcname: write_csv
-output-bmk-results.py(41):     if not self.predicate or not self.csvwriter:
-output-bmk-results.py(43):     self.csvwriter.writerow(arr)
-output-bmk-results.py(236):             continue
+output-bmk-results.py(225):             continue
 output-bmk-results.py(216):     for index, row in out_df.iterrows():
 output-bmk-results.py(218):         threshold = get_threshold(sym_type, metric, mode, row["benchmark"], row["symbol"])
  --- modulename: output-bmk-results, funcname: get_threshold
@@ -875,7 +881,7 @@ output-bmk-results.py(105):           return spec_thr
 output-bmk-results.py(220):         print("DEBUG: checking {0}.{1} : {2},{3} : {4}={5}% (threshold={6}%)"\
 output-bmk-results.py(221):              .format(sym_type, change_kind, row["benchmark"], row["symbol"], metric, 100-row["rel_" + metric], threshold))
 output-bmk-results.py(220):         print("DEBUG: checking {0}.{1} : {2},{3} : {4}={5}% (threshold={6}%)"\
-DEBUG: checking symbol.regression : 505.mcf_r,[.] primal_bea_mpp : sample=-1% (threshold=15%)
+DEBUG: checking symbol.regression : 505.mcf_r,[.] primal_bea_mpp : sample=1% (threshold=15%)
 output-bmk-results.py(224):         if not is_entry_xxx[change_kind](metric, row["rel_" + metric], threshold):
  --- modulename: output-bmk-results, funcname: is_entry_regression
 output-bmk-results.py(175):     if metric in metric_utils.higher_regress_metrics:
@@ -902,7 +908,7 @@ output-bmk-results.py(105):           return spec_thr
 output-bmk-results.py(220):         print("DEBUG: checking {0}.{1} : {2},{3} : {4}={5}% (threshold={6}%)"\
 output-bmk-results.py(221):              .format(sym_type, change_kind, row["benchmark"], row["symbol"], metric, 100-row["rel_" + metric], threshold))
 output-bmk-results.py(220):         print("DEBUG: checking {0}.{1} : {2},{3} : {4}={5}% (threshold={6}%)"\
-DEBUG: checking symbol.regression : 505.mcf_r,[.] price_out_impl : sample=-1% (threshold=15%)
+DEBUG: checking symbol.regression : 505.mcf_r,[.] price_out_impl : sample=0% (threshold=15%)
 output-bmk-results.py(224):         if not is_entry_xxx[change_kind](metric, row["rel_" + metric], threshold):
  --- modulename: output-bmk-results, funcname: is_entry_regression
 output-bmk-results.py(175):     if metric in metric_utils.higher_regress_metrics:
@@ -929,7 +935,7 @@ output-bmk-results.py(105):           return spec_thr
 output-bmk-results.py(220):         print("DEBUG: checking {0}.{1} : {2},{3} : {4}={5}% (threshold={6}%)"\
 output-bmk-results.py(221):              .format(sym_type, change_kind, row["benchmark"], row["symbol"], metric, 100-row["rel_" + metric], threshold))
 output-bmk-results.py(220):         print("DEBUG: checking {0}.{1} : {2},{3} : {4}={5}% (threshold={6}%)"\
-DEBUG: checking symbol.regression : 505.mcf_r,[.] cost_compare : sample=-2% (threshold=15%)
+DEBUG: checking symbol.regression : 505.mcf_r,[.] cost_compare : sample=-4% (threshold=15%)
 output-bmk-results.py(224):         if not is_entry_xxx[change_kind](metric, row["rel_" + metric], threshold):
  --- modulename: output-bmk-results, funcname: is_entry_regression
 output-bmk-results.py(175):     if metric in metric_utils.higher_regress_metrics:
@@ -956,7 +962,7 @@ output-bmk-results.py(105):           return spec_thr
 output-bmk-results.py(220):         print("DEBUG: checking {0}.{1} : {2},{3} : {4}={5}% (threshold={6}%)"\
 output-bmk-results.py(221):              .format(sym_type, change_kind, row["benchmark"], row["symbol"], metric, 100-row["rel_" + metric], threshold))
 output-bmk-results.py(220):         print("DEBUG: checking {0}.{1} : {2},{3} : {4}={5}% (threshold={6}%)"\
-DEBUG: checking symbol.regression : 505.mcf_r,[.] replace_weaker_arc : sample=1% (threshold=15%)
+DEBUG: checking symbol.regression : 505.mcf_r,[.] replace_weaker_arc : sample=0% (threshold=15%)
 output-bmk-results.py(224):         if not is_entry_xxx[change_kind](metric, row["rel_" + metric], threshold):
  --- modulename: output-bmk-results, funcname: is_entry_regression
 output-bmk-results.py(175):     if metric in metric_utils.higher_regress_metrics:
@@ -983,7 +989,7 @@ output-bmk-results.py(105):           return spec_thr
 output-bmk-results.py(220):         print("DEBUG: checking {0}.{1} : {2},{3} : {4}={5}% (threshold={6}%)"\
 output-bmk-results.py(221):              .format(sym_type, change_kind, row["benchmark"], row["symbol"], metric, 100-row["rel_" + metric], threshold))
 output-bmk-results.py(220):         print("DEBUG: checking {0}.{1} : {2},{3} : {4}={5}% (threshold={6}%)"\
-DEBUG: checking symbol.regression : 531.deepsjeng_r,[.] _Z5fevalP7state_tiP12t_eval_comps : sample=4% (threshold=15%)
+DEBUG: checking symbol.regression : 531.deepsjeng_r,[.] _Z5fevalP7state_tiP12t_eval_comps : sample=-6% (threshold=15%)
 output-bmk-results.py(224):         if not is_entry_xxx[change_kind](metric, row["rel_" + metric], threshold):
  --- modulename: output-bmk-results, funcname: is_entry_regression
 output-bmk-results.py(175):     if metric in metric_utils.higher_regress_metrics:
@@ -1010,7 +1016,7 @@ output-bmk-results.py(105):           return spec_thr
 output-bmk-results.py(220):         print("DEBUG: checking {0}.{1} : {2},{3} : {4}={5}% (threshold={6}%)"\
 output-bmk-results.py(221):              .format(sym_type, change_kind, row["benchmark"], row["symbol"], metric, 100-row["rel_" + metric], threshold))
 output-bmk-results.py(220):         print("DEBUG: checking {0}.{1} : {2},{3} : {4}={5}% (threshold={6}%)"\
-DEBUG: checking symbol.regression : 531.deepsjeng_r,[.] _Z7ProbeTTP7state_tPiiiPjS1_S1_S1_S1_i : sample=1% (threshold=15%)
+DEBUG: checking symbol.regression : 531.deepsjeng_r,[.] _Z7ProbeTTP7state_tPiiiPjS1_S1_S1_S1_i : sample=2% (threshold=15%)
 output-bmk-results.py(224):         if not is_entry_xxx[change_kind](metric, row["rel_" + metric], threshold):
  --- modulename: output-bmk-results, funcname: is_entry_regression
 output-bmk-results.py(175):     if metric in metric_utils.higher_regress_metrics:
@@ -1037,7 +1043,7 @@ output-bmk-results.py(105):           return spec_thr
 output-bmk-results.py(220):         print("DEBUG: checking {0}.{1} : {2},{3} : {4}={5}% (threshold={6}%)"\
 output-bmk-results.py(221):              .format(sym_type, change_kind, row["benchmark"], row["symbol"], metric, 100-row["rel_" + metric], threshold))
 output-bmk-results.py(220):         print("DEBUG: checking {0}.{1} : {2},{3} : {4}={5}% (threshold={6}%)"\
-DEBUG: checking symbol.regression : 531.deepsjeng_r,[.] _Z6searchP7state_tiiiii : sample=-9% (threshold=15%)
+DEBUG: checking symbol.regression : 531.deepsjeng_r,[.] _Z4makeP7state_ti : sample=0% (threshold=15%)
 output-bmk-results.py(224):         if not is_entry_xxx[change_kind](metric, row["rel_" + metric], threshold):
  --- modulename: output-bmk-results, funcname: is_entry_regression
 output-bmk-results.py(175):     if metric in metric_utils.higher_regress_metrics:
@@ -1064,31 +1070,7 @@ output-bmk-results.py(105):           return spec_thr
 output-bmk-results.py(220):         print("DEBUG: checking {0}.{1} : {2},{3} : {4}={5}% (threshold={6}%)"\
 output-bmk-results.py(221):              .format(sym_type, change_kind, row["benchmark"], row["symbol"], metric, 100-row["rel_" + metric], threshold))
 output-bmk-results.py(220):         print("DEBUG: checking {0}.{1} : {2},{3} : {4}={5}% (threshold={6}%)"\
-DEBUG: checking symbol.regression : 531.deepsjeng_r,[.] _Z4makeP7state_ti : sample=-5% (threshold=15.059999999999999%)
-output-bmk-results.py(224):         if not is_entry_xxx[change_kind](metric, row["rel_" + metric], threshold):
- --- modulename: output-bmk-results, funcname: is_entry_regression
-output-bmk-results.py(175):     if metric in metric_utils.higher_regress_metrics:
-output-bmk-results.py(176):         return (result - 100 > threshold)
-output-bmk-results.py(225):             continue
-output-bmk-results.py(216):     for index, row in out_df.iterrows():
-output-bmk-results.py(218):         threshold = get_threshold(sym_type, metric, mode, row["benchmark"], row["symbol"])
- --- modulename: output-bmk-results, funcname: get_threshold
-output-bmk-results.py(98):     if metric == "sample":
-output-bmk-results.py(99):        spec_thr = get_specific_thresholds(metric, mode, bmk, symb)
- --- modulename: output-bmk-results, funcname: get_specific_thresholds
-output-bmk-results.py(57):     if specific_variability is None:
-output-bmk-results.py(60):     var = specific_variability[ (specific_variability['benchmark'] == bmk) & (specific_variability['symbol'].str.strip() == symb)]
-output-bmk-results.py(61):     if var.empty:
-output-bmk-results.py(63):     elif len(var)>1:
-output-bmk-results.py(68):     if var.iloc[0]['sample_variation_average']>0 :
-output-bmk-results.py(83):     return np.nan
-output-bmk-results.py(100):        if not np.isnan(spec_thr):
-output-bmk-results.py(107):     if metric == "num_vect_loops" or metric == "num_sve_loops":
-output-bmk-results.py(110):     return default_threshold[(change_kind,metric,mode)]
-output-bmk-results.py(220):         print("DEBUG: checking {0}.{1} : {2},{3} : {4}={5}% (threshold={6}%)"\
-output-bmk-results.py(221):              .format(sym_type, change_kind, row["benchmark"], row["symbol"], metric, 100-row["rel_" + metric], threshold))
-output-bmk-results.py(220):         print("DEBUG: checking {0}.{1} : {2},{3} : {4}={5}% (threshold={6}%)"\
-DEBUG: checking symbol.regression : 531.deepsjeng_r,[.] _Z3seeP7state_tiiii : sample=2% (threshold=15%)
+DEBUG: checking symbol.regression : 531.deepsjeng_r,[.] _Z6searchP7state_tiiiii : sample=8% (threshold=15%)
 output-bmk-results.py(224):         if not is_entry_xxx[change_kind](metric, row["rel_" + metric], threshold):
  --- modulename: output-bmk-results, funcname: is_entry_regression
 output-bmk-results.py(175):     if metric in metric_utils.higher_regress_metrics:
@@ -1115,7 +1097,7 @@ output-bmk-results.py(105):           return spec_thr
 output-bmk-results.py(220):         print("DEBUG: checking {0}.{1} : {2},{3} : {4}={5}% (threshold={6}%)"\
 output-bmk-results.py(221):              .format(sym_type, change_kind, row["benchmark"], row["symbol"], metric, 100-row["rel_" + metric], threshold))
 output-bmk-results.py(220):         print("DEBUG: checking {0}.{1} : {2},{3} : {4}={5}% (threshold={6}%)"\
-DEBUG: checking symbol.regression : 557.xz_r,[.] lzma_mf_bt4_find : sample=-2% (threshold=15%)
+DEBUG: checking symbol.regression : 557.xz_r,[.] lzma_mf_bt4_find : sample=1% (threshold=15%)
 output-bmk-results.py(224):         if not is_entry_xxx[change_kind](metric, row["rel_" + metric], threshold):
  --- modulename: output-bmk-results, funcname: is_entry_regression
 output-bmk-results.py(175):     if metric in metric_utils.higher_regress_metrics:
@@ -1142,7 +1124,7 @@ output-bmk-results.py(105):           return spec_thr
 output-bmk-results.py(220):         print("DEBUG: checking {0}.{1} : {2},{3} : {4}={5}% (threshold={6}%)"\
 output-bmk-results.py(221):              .format(sym_type, change_kind, row["benchmark"], row["symbol"], metric, 100-row["rel_" + metric], threshold))
 output-bmk-results.py(220):         print("DEBUG: checking {0}.{1} : {2},{3} : {4}={5}% (threshold={6}%)"\
-DEBUG: checking symbol.regression : 557.xz_r,[.] lzma_lzma_optimum_normal : sample=3% (threshold=15%)
+DEBUG: checking symbol.regression : 557.xz_r,[.] lzma_lzma_optimum_normal : sample=-2% (threshold=15%)
 output-bmk-results.py(224):         if not is_entry_xxx[change_kind](metric, row["rel_" + metric], threshold):
  --- modulename: output-bmk-results, funcname: is_entry_regression
 output-bmk-results.py(175):     if metric in metric_utils.higher_regress_metrics:
@@ -1169,7 +1151,7 @@ output-bmk-results.py(105):           return spec_thr
 output-bmk-results.py(220):         print("DEBUG: checking {0}.{1} : {2},{3} : {4}={5}% (threshold={6}%)"\
 output-bmk-results.py(221):              .format(sym_type, change_kind, row["benchmark"], row["symbol"], metric, 100-row["rel_" + metric], threshold))
 output-bmk-results.py(220):         print("DEBUG: checking {0}.{1} : {2},{3} : {4}={5}% (threshold={6}%)"\
-DEBUG: checking symbol.regression : 557.xz_r,[.] lzma_mf_bt4_skip : sample=1% (threshold=15%)
+DEBUG: checking symbol.regression : 557.xz_r,[.] lzma_mf_bt4_skip : sample=-2% (threshold=15%)
 output-bmk-results.py(224):         if not is_entry_xxx[change_kind](metric, row["rel_" + metric], threshold):
  --- modulename: output-bmk-results, funcname: is_entry_regression
 output-bmk-results.py(175):     if metric in metric_utils.higher_regress_metrics:
@@ -1214,7 +1196,7 @@ output-bmk-results.py(105):           return spec_thr
 output-bmk-results.py(220):         print("DEBUG: checking {0}.{1} : {2},{3} : {4}={5}% (threshold={6}%)"\
 output-bmk-results.py(221):              .format(sym_type, change_kind, row["benchmark"], row["symbol"], metric, 100-row["rel_" + metric], threshold))
 output-bmk-results.py(220):         print("DEBUG: checking {0}.{1} : {2},{3} : {4}={5}% (threshold={6}%)"\
-DEBUG: checking symbol.improvement : 505.mcf_r,[.] primal_bea_mpp : sample=-1% (threshold=15%)
+DEBUG: checking symbol.improvement : 505.mcf_r,[.] primal_bea_mpp : sample=1% (threshold=15%)
 output-bmk-results.py(224):         if not is_entry_xxx[change_kind](metric, row["rel_" + metric], threshold):
  --- modulename: output-bmk-results, funcname: is_entry_improvement
 output-bmk-results.py(184):     if metric in metric_utils.higher_regress_metrics:
@@ -1241,7 +1223,7 @@ output-bmk-results.py(105):           return spec_thr
 output-bmk-results.py(220):         print("DEBUG: checking {0}.{1} : {2},{3} : {4}={5}% (threshold={6}%)"\
 output-bmk-results.py(221):              .format(sym_type, change_kind, row["benchmark"], row["symbol"], metric, 100-row["rel_" + metric], threshold))
 output-bmk-results.py(220):         print("DEBUG: checking {0}.{1} : {2},{3} : {4}={5}% (threshold={6}%)"\
-DEBUG: checking symbol.improvement : 505.mcf_r,[.] price_out_impl : sample=-1% (threshold=15%)
+DEBUG: checking symbol.improvement : 505.mcf_r,[.] price_out_impl : sample=0% (threshold=15%)
 output-bmk-results.py(224):         if not is_entry_xxx[change_kind](metric, row["rel_" + metric], threshold):
  --- modulename: output-bmk-results, funcname: is_entry_improvement
 output-bmk-results.py(184):     if metric in metric_utils.higher_regress_metrics:
@@ -1268,7 +1250,7 @@ output-bmk-results.py(105):           return spec_thr
 output-bmk-results.py(220):         print("DEBUG: checking {0}.{1} : {2},{3} : {4}={5}% (threshold={6}%)"\
 output-bmk-results.py(221):              .format(sym_type, change_kind, row["benchmark"], row["symbol"], metric, 100-row["rel_" + metric], threshold))
 output-bmk-results.py(220):         print("DEBUG: checking {0}.{1} : {2},{3} : {4}={5}% (threshold={6}%)"\
-DEBUG: checking symbol.improvement : 505.mcf_r,[.] cost_compare : sample=-2% (threshold=15%)
+DEBUG: checking symbol.improvement : 505.mcf_r,[.] cost_compare : sample=-4% (threshold=15%)
 output-bmk-results.py(224):         if not is_entry_xxx[change_kind](metric, row["rel_" + metric], threshold):
  --- modulename: output-bmk-results, funcname: is_entry_improvement
 output-bmk-results.py(184):     if metric in metric_utils.higher_regress_metrics:
@@ -1295,7 +1277,7 @@ output-bmk-results.py(105):           return spec_thr
 output-bmk-results.py(220):         print("DEBUG: checking {0}.{1} : {2},{3} : {4}={5}% (threshold={6}%)"\
 output-bmk-results.py(221):              .format(sym_type, change_kind, row["benchmark"], row["symbol"], metric, 100-row["rel_" + metric], threshold))
 output-bmk-results.py(220):         print("DEBUG: checking {0}.{1} : {2},{3} : {4}={5}% (threshold={6}%)"\
-DEBUG: checking symbol.improvement : 505.mcf_r,[.] replace_weaker_arc : sample=1% (threshold=15%)
+DEBUG: checking symbol.improvement : 505.mcf_r,[.] replace_weaker_arc : sample=0% (threshold=15%)
 output-bmk-results.py(224):         if not is_entry_xxx[change_kind](metric, row["rel_" + metric], threshold):
  --- modulename: output-bmk-results, funcname: is_entry_improvement
 output-bmk-results.py(184):     if metric in metric_utils.higher_regress_metrics:
@@ -1322,7 +1304,7 @@ output-bmk-results.py(105):           return spec_thr
 output-bmk-results.py(220):         print("DEBUG: checking {0}.{1} : {2},{3} : {4}={5}% (threshold={6}%)"\
 output-bmk-results.py(221):              .format(sym_type, change_kind, row["benchmark"], row["symbol"], metric, 100-row["rel_" + metric], threshold))
 output-bmk-results.py(220):         print("DEBUG: checking {0}.{1} : {2},{3} : {4}={5}% (threshold={6}%)"\
-DEBUG: checking symbol.improvement : 531.deepsjeng_r,[.] _Z5fevalP7state_tiP12t_eval_comps : sample=4% (threshold=15%)
+DEBUG: checking symbol.improvement : 531.deepsjeng_r,[.] _Z5fevalP7state_tiP12t_eval_comps : sample=-6% (threshold=15%)
 output-bmk-results.py(224):         if not is_entry_xxx[change_kind](metric, row["rel_" + metric], threshold):
  --- modulename: output-bmk-results, funcname: is_entry_improvement
 output-bmk-results.py(184):     if metric in metric_utils.higher_regress_metrics:
@@ -1349,7 +1331,7 @@ output-bmk-results.py(105):           return spec_thr
 output-bmk-results.py(220):         print("DEBUG: checking {0}.{1} : {2},{3} : {4}={5}% (threshold={6}%)"\
 output-bmk-results.py(221):              .format(sym_type, change_kind, row["benchmark"], row["symbol"], metric, 100-row["rel_" + metric], threshold))
 output-bmk-results.py(220):         print("DEBUG: checking {0}.{1} : {2},{3} : {4}={5}% (threshold={6}%)"\
-DEBUG: checking symbol.improvement : 531.deepsjeng_r,[.] _Z7ProbeTTP7state_tPiiiPjS1_S1_S1_S1_i : sample=1% (threshold=15%)
+DEBUG: checking symbol.improvement : 531.deepsjeng_r,[.] _Z7ProbeTTP7state_tPiiiPjS1_S1_S1_S1_i : sample=2% (threshold=15%)
 output-bmk-results.py(224):         if not is_entry_xxx[change_kind](metric, row["rel_" + metric], threshold):
  --- modulename: output-bmk-results, funcname: is_entry_improvement
 output-bmk-results.py(184):     if metric in metric_utils.higher_regress_metrics:
@@ -1376,7 +1358,7 @@ output-bmk-results.py(105):           return spec_thr
 output-bmk-results.py(220):         print("DEBUG: checking {0}.{1} : {2},{3} : {4}={5}% (threshold={6}%)"\
 output-bmk-results.py(221):              .format(sym_type, change_kind, row["benchmark"], row["symbol"], metric, 100-row["rel_" + metric], threshold))
 output-bmk-results.py(220):         print("DEBUG: checking {0}.{1} : {2},{3} : {4}={5}% (threshold={6}%)"\
-DEBUG: checking symbol.improvement : 531.deepsjeng_r,[.] _Z6searchP7state_tiiiii : sample=-9% (threshold=15%)
+DEBUG: checking symbol.improvement : 531.deepsjeng_r,[.] _Z4makeP7state_ti : sample=0% (threshold=15%)
 output-bmk-results.py(224):         if not is_entry_xxx[change_kind](metric, row["rel_" + metric], threshold):
  --- modulename: output-bmk-results, funcname: is_entry_improvement
 output-bmk-results.py(184):     if metric in metric_utils.higher_regress_metrics:
@@ -1403,31 +1385,7 @@ output-bmk-results.py(105):           return spec_thr
 output-bmk-results.py(220):         print("DEBUG: checking {0}.{1} : {2},{3} : {4}={5}% (threshold={6}%)"\
 output-bmk-results.py(221):              .format(sym_type, change_kind, row["benchmark"], row["symbol"], metric, 100-row["rel_" + metric], threshold))
 output-bmk-results.py(220):         print("DEBUG: checking {0}.{1} : {2},{3} : {4}={5}% (threshold={6}%)"\
-DEBUG: checking symbol.improvement : 531.deepsjeng_r,[.] _Z4makeP7state_ti : sample=-5% (threshold=15.059999999999999%)
-output-bmk-results.py(224):         if not is_entry_xxx[change_kind](metric, row["rel_" + metric], threshold):
- --- modulename: output-bmk-results, funcname: is_entry_improvement
-output-bmk-results.py(184):     if metric in metric_utils.higher_regress_metrics:
-output-bmk-results.py(185):         return (100 - result > threshold)
-output-bmk-results.py(225):             continue
-output-bmk-results.py(216):     for index, row in out_df.iterrows():
-output-bmk-results.py(218):         threshold = get_threshold(sym_type, metric, mode, row["benchmark"], row["symbol"])
- --- modulename: output-bmk-results, funcname: get_threshold
-output-bmk-results.py(98):     if metric == "sample":
-output-bmk-results.py(99):        spec_thr = get_specific_thresholds(metric, mode, bmk, symb)
- --- modulename: output-bmk-results, funcname: get_specific_thresholds
-output-bmk-results.py(57):     if specific_variability is None:
-output-bmk-results.py(60):     var = specific_variability[ (specific_variability['benchmark'] == bmk) & (specific_variability['symbol'].str.strip() == symb)]
-output-bmk-results.py(61):     if var.empty:
-output-bmk-results.py(63):     elif len(var)>1:
-output-bmk-results.py(68):     if var.iloc[0]['sample_variation_average']>0 :
-output-bmk-results.py(83):     return np.nan
-output-bmk-results.py(100):        if not np.isnan(spec_thr):
-output-bmk-results.py(107):     if metric == "num_vect_loops" or metric == "num_sve_loops":
-output-bmk-results.py(110):     return default_threshold[(change_kind,metric,mode)]
-output-bmk-results.py(220):         print("DEBUG: checking {0}.{1} : {2},{3} : {4}={5}% (threshold={6}%)"\
-output-bmk-results.py(221):              .format(sym_type, change_kind, row["benchmark"], row["symbol"], metric, 100-row["rel_" + metric], threshold))
-output-bmk-results.py(220):         print("DEBUG: checking {0}.{1} : {2},{3} : {4}={5}% (threshold={6}%)"\
-DEBUG: checking symbol.improvement : 531.deepsjeng_r,[.] _Z3seeP7state_tiiii : sample=2% (threshold=15%)
+DEBUG: checking symbol.improvement : 531.deepsjeng_r,[.] _Z6searchP7state_tiiiii : sample=8% (threshold=15%)
 output-bmk-results.py(224):         if not is_entry_xxx[change_kind](metric, row["rel_" + metric], threshold):
  --- modulename: output-bmk-results, funcname: is_entry_improvement
 output-bmk-results.py(184):     if metric in metric_utils.higher_regress_metrics:
@@ -1454,7 +1412,7 @@ output-bmk-results.py(105):           return spec_thr
 output-bmk-results.py(220):         print("DEBUG: checking {0}.{1} : {2},{3} : {4}={5}% (threshold={6}%)"\
 output-bmk-results.py(221):              .format(sym_type, change_kind, row["benchmark"], row["symbol"], metric, 100-row["rel_" + metric], threshold))
 output-bmk-results.py(220):         print("DEBUG: checking {0}.{1} : {2},{3} : {4}={5}% (threshold={6}%)"\
-DEBUG: checking symbol.improvement : 557.xz_r,[.] lzma_mf_bt4_find : sample=-2% (threshold=15%)
+DEBUG: checking symbol.improvement : 557.xz_r,[.] lzma_mf_bt4_find : sample=1% (threshold=15%)
 output-bmk-results.py(224):         if not is_entry_xxx[change_kind](metric, row["rel_" + metric], threshold):
  --- modulename: output-bmk-results, funcname: is_entry_improvement
 output-bmk-results.py(184):     if metric in metric_utils.higher_regress_metrics:
@@ -1481,7 +1439,7 @@ output-bmk-results.py(105):           return spec_thr
 output-bmk-results.py(220):         print("DEBUG: checking {0}.{1} : {2},{3} : {4}={5}% (threshold={6}%)"\
 output-bmk-results.py(221):              .format(sym_type, change_kind, row["benchmark"], row["symbol"], metric, 100-row["rel_" + metric], threshold))
 output-bmk-results.py(220):         print("DEBUG: checking {0}.{1} : {2},{3} : {4}={5}% (threshold={6}%)"\
-DEBUG: checking symbol.improvement : 557.xz_r,[.] lzma_lzma_optimum_normal : sample=3% (threshold=15%)
+DEBUG: checking symbol.improvement : 557.xz_r,[.] lzma_lzma_optimum_normal : sample=-2% (threshold=15%)
 output-bmk-results.py(224):         if not is_entry_xxx[change_kind](metric, row["rel_" + metric], threshold):
  --- modulename: output-bmk-results, funcname: is_entry_improvement
 output-bmk-results.py(184):     if metric in metric_utils.higher_regress_metrics:
@@ -1508,7 +1466,7 @@ output-bmk-results.py(105):           return spec_thr
 output-bmk-results.py(220):         print("DEBUG: checking {0}.{1} : {2},{3} : {4}={5}% (threshold={6}%)"\
 output-bmk-results.py(221):              .format(sym_type, change_kind, row["benchmark"], row["symbol"], metric, 100-row["rel_" + metric], threshold))
 output-bmk-results.py(220):         print("DEBUG: checking {0}.{1} : {2},{3} : {4}={5}% (threshold={6}%)"\
-DEBUG: checking symbol.improvement : 557.xz_r,[.] lzma_mf_bt4_skip : sample=1% (threshold=15%)
+DEBUG: checking symbol.improvement : 557.xz_r,[.] lzma_mf_bt4_skip : sample=-2% (threshold=15%)
 output-bmk-results.py(224):         if not is_entry_xxx[change_kind](metric, row["rel_" + metric], threshold):
  --- modulename: output-bmk-results, funcname: is_entry_improvement
 output-bmk-results.py(184):     if metric in metric_utils.higher_regress_metrics:
@@ -1530,12 +1488,12 @@ output-bmk-results.py(270):     f_skip.close()
 output-bmk-results.py(29):     if not self.outf:
 output-bmk-results.py(31):     self.outf.close()
 output-bmk-results.py(32):     if os.stat(self.filename).st_size == 0:
+output-bmk-results.py(33):        os.remove(self.filename)
 output-bmk-results.py(271):     f_regr.close()
  --- modulename: output-bmk-results, funcname: close
 output-bmk-results.py(29):     if not self.outf:
 output-bmk-results.py(31):     self.outf.close()
 output-bmk-results.py(32):     if os.stat(self.filename).st_size == 0:
-output-bmk-results.py(33):        os.remove(self.filename)
 output-bmk-results.py(272):     f_ebp.close()
  --- modulename: output-bmk-results, funcname: close
 output-bmk-results.py(29):     if not self.outf:
diff --git a/notify/results.regressions b/notify/results.regressions
new file mode 100644
index 0000000..24275d8
--- /dev/null
+++ b/notify/results.regressions
@@ -0,0 +1 @@
+# leela_r_base.default,541.leela_r failed to run