aboutsummaryrefslogtreecommitdiff
path: root/fixup-perf-csv.py
diff options
context:
space:
mode:
Diffstat (limited to 'fixup-perf-csv.py')
-rwxr-xr-xfixup-perf-csv.py54
1 files changed, 11 insertions, 43 deletions
diff --git a/fixup-perf-csv.py b/fixup-perf-csv.py
index 4e9e834..338dd33 100755
--- a/fixup-perf-csv.py
+++ b/fixup-perf-csv.py
@@ -7,55 +7,23 @@ import pandas as pd
# (a) If dso is not present in perf.csv, it adds a dummy dso column.
# (b) We don't have dso entry present for binary and libraries, so create a dummy one.
# (c) Strip whitespace in dso name.
-# (d) Sets status field of symbol entries to 'na' and removes symbol entries
-# for failed benchmarks.
def main():
- assert len(sys.argv) == 3
- perf_df = pd.read_csv(sys.argv[1])
- status_df = pd.read_csv(sys.argv[2])
-
- if "dso" not in list(perf_df.columns.values):
- perf_df["dso"] = "na"
- perf_df = perf_df.fillna("na")
- perf_df["dso"] = perf_df["dso"].str.strip()
-
- merged_df = pd.merge(perf_df, status_df, how="outer", on=["benchmark", "symbol"])
- # When all benchmarks have failed, perf-tmp.csv (perf_df) is empty.
- # and this messes up order of columns while merging. Rearrange the columns
- # to "expected order" with benchmark,symbol appearing first.
+ assert len(sys.argv) == 2
+ df = pd.read_csv(sys.argv[1])
+ if "dso" not in list(df.columns.values):
+ df["dso"] = "na"
+ df = df.fillna("na")
+ df["dso"] = df["dso"].str.strip()
+ # When all benchmarks have failed, perf.csv is empty, and this messes up
+ # order of columns while merging. Rearrange the columns to "expected order"
+ # with benchmark,symbol appearing first.
# The order of columns shouldn't really be an issue, but we need
# (or needed at some point) for benchmark to be the first metric, and thus
# assert for it in merge-metric-csvs.py. This needs to be re-checked after
# we move away from csvs2table.py.
- merged_df = merged_df[["benchmark", "symbol", "sample", "dso", "status"]]
-
- merged_df["sample"] = merged_df["sample"].fillna(-1).astype("int")
- merged_df["dso"] = merged_df["dso"].fillna("na")
- merged_df["status"] = merged_df["status"].fillna("na")
-
- # FIXME: We end up having duplicates in status.csv for some runs.
- # Remove duplicate entries from merged_df, if they occur in either perf-tmp.csv
- # or status.csv.
- merged_df = merged_df.drop_duplicates()
-
- # Iterate over each row in merged_df, and only keep symbol
- # entries for those bmks that ran successfully, so later metric and comparison
- # scripts don't process symbols for failed benchmarks like LLVM-1070.
- # FIXME: Growing a dataframe inside a loop can end up being terribly inefficient,
- # revisit later.
- res_df = pd.DataFrame(columns=merged_df.columns)
- keep_symbol_row = True
- for index, row in merged_df.iterrows():
- if row['status'] != 'na' or keep_symbol_row:
- res_df.loc[len(res_df)] = row
- if row['status'] != 'na':
- keep_symbol_row = True if row['status'] == 'success' \
- else False
-
- # Output perf.csv combined with status info and symbol entries removed
- # for failed benchmarks.
- res_df.to_csv(sys.stdout, index=False)
+ df = df[["benchmark", "symbol", "sample", "dso"]]
+ df.to_csv(sys.stdout, index=False)
if __name__ == "__main__":
main()