aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorPrathamesh Kulkarni <prathamesh.kulkarni@linaro.org>2024-03-21 20:57:10 +0530
committerPrathamesh Kulkarni <prathamesh.kulkarni@linaro.org>2024-03-21 20:57:10 +0530
commit5220b39df3f36e33912666633512fa04eab80b6d (patch)
treec5b2eb7b49124a819b962f7916edd069b0a32e85
parent0f75b3101f2a46d477f17b3e69dfb14ab3c82c56 (diff)
Remove merging of status metric in fixup-perf-csv.py.
This patch partially reverts commit to merge status.csv and perf.csv, and filter out failed benchmarks, thus avoiding special casing of status metric. Instead the current workflow is to have metric computing scripts filter out successful bmks, and merge-metric-csvs.py merge the status field. Change-Id: If21201dfbed70ab3a064c25771618331e368050f
-rwxr-xr-xfixup-perf-csv.py54
1 files changed, 11 insertions, 43 deletions
diff --git a/fixup-perf-csv.py b/fixup-perf-csv.py
index 4e9e834..338dd33 100755
--- a/fixup-perf-csv.py
+++ b/fixup-perf-csv.py
@@ -7,55 +7,23 @@ import pandas as pd
# (a) If dso is not present in perf.csv, it adds a dummy dso column.
# (b) We don't have dso entry present for binary and libraries, so create a dummy one.
# (c) Strip whitespace in dso name.
-# (d) Sets status field of symbol entries to 'na' and removes symbol entries
-# for failed benchmarks.
def main():
- assert len(sys.argv) == 3
- perf_df = pd.read_csv(sys.argv[1])
- status_df = pd.read_csv(sys.argv[2])
-
- if "dso" not in list(perf_df.columns.values):
- perf_df["dso"] = "na"
- perf_df = perf_df.fillna("na")
- perf_df["dso"] = perf_df["dso"].str.strip()
-
- merged_df = pd.merge(perf_df, status_df, how="outer", on=["benchmark", "symbol"])
- # When all benchmarks have failed, perf-tmp.csv (perf_df) is empty.
- # and this messes up order of columns while merging. Rearrange the columns
- # to "expected order" with benchmark,symbol appearing first.
+ assert len(sys.argv) == 2
+ df = pd.read_csv(sys.argv[1])
+ if "dso" not in list(df.columns.values):
+ df["dso"] = "na"
+ df = df.fillna("na")
+ df["dso"] = df["dso"].str.strip()
+ # When all benchmarks have failed, perf.csv is empty, and this messes up
+ # order of columns while merging. Rearrange the columns to "expected order"
+ # with benchmark,symbol appearing first.
# The order of columns shouldn't really be an issue, but we need
# (or needed at some point) for benchmark to be the first metric, and thus
# assert for it in merge-metric-csvs.py. This needs to be re-checked after
# we move away from csvs2table.py.
- merged_df = merged_df[["benchmark", "symbol", "sample", "dso", "status"]]
-
- merged_df["sample"] = merged_df["sample"].fillna(-1).astype("int")
- merged_df["dso"] = merged_df["dso"].fillna("na")
- merged_df["status"] = merged_df["status"].fillna("na")
-
- # FIXME: We end up having duplicates in status.csv for some runs.
- # Remove duplicate entries from merged_df, if they occur in either perf-tmp.csv
- # or status.csv.
- merged_df = merged_df.drop_duplicates()
-
- # Iterate over each row in merged_df, and only keep symbol
- # entries for those bmks that ran successfully, so later metric and comparison
- # scripts don't process symbols for failed benchmarks like LLVM-1070.
- # FIXME: Growing a dataframe inside a loop can end up being terribly inefficient,
- # revisit later.
- res_df = pd.DataFrame(columns=merged_df.columns)
- keep_symbol_row = True
- for index, row in merged_df.iterrows():
- if row['status'] != 'na' or keep_symbol_row:
- res_df.loc[len(res_df)] = row
- if row['status'] != 'na':
- keep_symbol_row = True if row['status'] == 'success' \
- else False
-
- # Output perf.csv combined with status info and symbol entries removed
- # for failed benchmarks.
- res_df.to_csv(sys.stdout, index=False)
+ df = df[["benchmark", "symbol", "sample", "dso"]]
+ df.to_csv(sys.stdout, index=False)
if __name__ == "__main__":
main()