From 2610b0ea1dad2dc26e5eeb132983118188faf79c Mon Sep 17 00:00:00 2001 From: augustin64 Date: Fri, 21 Jun 2024 17:34:07 +0200 Subject: [PATCH] Add --no-slice-remap Split data if needed --- cache_utils/analyse_csv.py | 92 +++++++++++++++++++++++++++++--------- 1 file changed, 70 insertions(+), 22 deletions(-) diff --git a/cache_utils/analyse_csv.py b/cache_utils/analyse_csv.py index b8cdf6c..69373d7 100644 --- a/cache_utils/analyse_csv.py +++ b/cache_utils/analyse_csv.py @@ -76,12 +76,21 @@ parser.add_argument( help="Don't compute figures, just create .stats.csv file" ) +parser.add_argument( + "--no-slice-remap", + dest="slice_remap", + action="store_false", + default=True, + help="Don't remap the slices" +) + args = parser.parse_args() img_dir = os.path.dirname(args.path)+"/figs/" os.makedirs(img_dir, exist_ok=True) -assert os.path.exists(args.path + ".slices.csv") +if args.slice_remap: + assert os.path.exists(args.path + ".slices.csv") assert os.path.exists(args.path + ".cores.csv") assert os.path.exists(args.path + "-results_lite.csv.bz2") @@ -129,8 +138,8 @@ sample_flush_columns = [ "clflush_local_hit_n", ] - -slice_mapping = pd.read_csv(args.path + ".slices.csv") +if args.slice_remap: + slice_mapping = pd.read_csv(args.path + ".slices.csv") core_mapping = pd.read_csv(args.path + ".cores.csv") def remap_core(key): @@ -149,8 +158,11 @@ df["helper_core_fixed"] = df["helper_core"].apply(remap_core("core")) df["helper_ht"] = df["helper_core"].apply(remap_core("hthread")) -slice_remap = lambda h: slice_mapping["slice_group"].iloc[h] -df["slice_group"] = df["hash"].apply(slice_remap) +if args.slice_remap: + slice_remap = lambda h: slice_mapping["slice_group"].iloc[h] + df["slice_group"] = df["hash"].apply(slice_remap) +else: + df["slice_group"] = df["hash"] def get_graphing_bounds(): @@ -215,20 +227,56 @@ def show_grid(df, col, row, shown=["clflush_miss_n", "clflush_remote_hit", "clfl return g def export_stats_csv(): - def stat(x, key): - return wq.median(x["time"], x[key]) + def get_spread(df, key): + filtered_df = df[(df[key] != 0)] + mini, maxi = filtered_df["time"].min(), filtered_df["time"].max() + return maxi-mini + + def compute_stat(x, key): + def compute_median(x): + return wq.median(x["time"], x[key]) + + filtered_x = x[(x[key] != 0)] + mini, maxi = filtered_x["time"].min(), filtered_x["time"].max() + + miss_spread = get_spread(x, "clflush_miss_n") + + if maxi-mini < 3*miss_spread: + med = compute_median(x) + return [med, med] + + if key == "clflush_remote_hit": + """print( + "double for core {}:{}@{}, helper {}:{}@{}".format( + x["main_core_fixed"].unique()[0], + x["main_ht"].unique()[0], + x["main_socket"].unique()[0], + x["helper_core_fixed"].unique()[0], + x["helper_ht"].unique()[0], + x["helper_socket"].unique()[0], + ) + )""" + center = mini + (maxi-mini)/2 + return [compute_median(filtered_x[(filtered_x["time"] < center)]), compute_median(filtered_x[(filtered_x["time"] >= center)])] + df_grouped = df.groupby(["main_core", "helper_core", "hash"]) + + miss = df_grouped.apply(lambda x: compute_stat(x, "clflush_miss_n")) + hit_remote = df_grouped.apply(lambda x: compute_stat(x, "clflush_remote_hit")) + hit_local = df_grouped.apply(lambda x: compute_stat(x, "clflush_local_hit_n")) + hit_shared = df_grouped.apply(lambda x: compute_stat(x, "clflush_shared_hit")) - miss = df_grouped.apply(stat, "clflush_miss_n") - hit_remote = df_grouped.apply(stat, "clflush_remote_hit") - hit_local = df_grouped.apply(stat, "clflush_local_hit_n") - hit_shared = df_grouped.apply(stat, "clflush_shared_hit") + stats = pd.DataFrame({ + "main_core": miss.index.get_level_values(0), + "helper_core": miss.index.get_level_values(1), + "hash": miss.index.get_level_values(2), + "clflush_miss_n": miss.values, + "clflush_remote_hit": hit_remote.values, + "clflush_local_hit_n": hit_local.values, + "clflush_shared_hit": hit_shared.values + }) - stats = miss.reset_index() - stats.columns = ["main_core", "helper_core", "hash", "clflush_miss_n"] - stats["clflush_remote_hit"] = hit_remote.values - stats["clflush_local_hit_n"] = hit_local.values - stats["clflush_shared_hit"] = hit_shared.values + stats = stats.explode(['clflush_miss_n', 'clflush_remote_hit', 'clflush_local_hit_n', 'clflush_shared_hit']) stats.to_csv(args.path + ".stats.csv", index=False) @@ -247,14 +295,14 @@ if not args.stats: df_main_core_0 = df[df["main_core"] == 0] df_main_core_0.loc[:, ("hash",)] = df["hash"].apply(dict_to_json) - g = show_grid(df_main_core_0, "helper_core", "hash") - plot("helper_grid.png", g=g) + g = show_grid(df_main_core_0, "helper_core", "hash", shown=["clflush_miss_n", "clflush_remote_hit"]) + plot("grid_helper_dual.png", g=g) - g = show_grid(df, "main_core", "hash") - plot("main_grid.png", g=g) + g = show_grid(df, "main_core", "hash", shown=["clflush_miss_n", "clflush_remote_hit"]) + plot("grid_main_dual.png", g=g) - g = show_grid(df, "main_core", "helper_core") - plot("main_helper_grid.png", g=g) + g = show_grid(df, "main_core", "helper_core", shown=["clflush_miss_n", "clflush_remote_hit"]) + plot("grid_main_helper_dual.png", g=g) if not os.path.exists(args.path + ".stats.csv") or args.stats: