From 545d07dd5e0a8074b560d4ac395bcf98c0ffeb48 Mon Sep 17 00:00:00 2001 From: augustin64 Date: Mon, 17 Jun 2024 16:39:11 +0200 Subject: [PATCH] Add analyse_reorder.py --- cache_utils/analyse_medians.py | 19 ++-- cache_utils/analyse_reorder.py | 177 +++++++++++++++++++++++++++++++++ 2 files changed, 188 insertions(+), 8 deletions(-) create mode 100644 cache_utils/analyse_reorder.py diff --git a/cache_utils/analyse_medians.py b/cache_utils/analyse_medians.py index a1faaa7..2e18880 100644 --- a/cache_utils/analyse_medians.py +++ b/cache_utils/analyse_medians.py @@ -7,6 +7,8 @@ import os import sys import argparse import warnings +import itertools +from multiprocessing import Pool import numpy as np import pandas as pd @@ -419,12 +421,6 @@ def all_facets(df, pre="", post="", *args, **kwargs): ) -if args.rslice: - rslice() - -# do_predictions(stats) -# all_facets(stats, "") - def do_facet(main: int, helper: int, line: bool): df = stats.copy(deep=True) @@ -451,8 +447,15 @@ def do_facet(main: int, helper: int, line: bool): draw_fn=sns.lineplot if line else sns.scatterplot ) -from multiprocessing import Pool -import itertools + + +if args.rslice: + rslice() + +# do_predictions(stats) +# all_facets(stats, "") + + with Pool(8) as pool: pool.starmap(do_facet, itertools.product((0, 1), (0, 1), (True, False))) diff --git a/cache_utils/analyse_reorder.py b/cache_utils/analyse_reorder.py new file mode 100644 index 0000000..fd991de --- /dev/null +++ b/cache_utils/analyse_reorder.py @@ -0,0 +1,177 @@ +import os +import sys +import argparse +import itertools +from multiprocessing import Pool + +import numpy as np +import pandas as pd +import seaborn as sns +from scipy import optimize +import matplotlib.pyplot as plt + + +parser = argparse.ArgumentParser( + prog=sys.argv[0], +) +parser.add_argument("path", help="Path to the experiment files") +args = parser.parse_args() + + +assert os.path.exists(args.path + ".stats.csv") +assert os.path.exists(args.path + ".cores.csv") + +stats = pd.read_csv( + args.path + ".stats.csv", + dtype={ + "main_core": np.int8, + "helper_core": np.int8, + # "address": int, + "hash": np.int8, + # "time": np.int16, + "clflush_remote_hit": np.float64, + "clflush_shared_hit": np.float64, + # "clflush_miss_f": np.int32, + # "clflush_local_hit_f": np.int32, + "clflush_miss_n": np.float64, + "clflush_local_hit_n": np.float64, + # "reload_miss": np.int32, + # "reload_remote_hit": np.int32, + # "reload_shared_hit": np.int32, + # "reload_local_hit": np.int32 + }, +) + +core_mapping = pd.read_csv(args.path + ".cores.csv") + +cores = list(stats["main_core"].unique()) +slices = list(stats["hash"].unique()) + + + +def slice_reorder(df, fst_slice, params=None): + if params is None: + params = ["clflush_miss_n", "clflush_remote_hit"] + + keys = slices.copy() + sliced_df = { + i : df[(df["hash"] == i)] for i in keys + } + + def distance(df1, df2): + dist = 0 + for param in params: + for core, helper in itertools.product(cores, cores): + med1 = df1[(df1["main_core"] == core) & (df1["helper_core"] == helper)][param].median() + med2 = df2[(df2["main_core"] == core) & (df2["helper_core"] == helper)][param].median() + dist += (med1 - med2)**2 + + return dist + + def find_nearest(slice): + distances = { i : distance(sliced_df[slice], sliced_df[i]) for i in keys} + nearest = sorted(keys, key=lambda x: distances[x])[0] + return nearest, distances[nearest] + + new_reorder = [fst_slice] + total_dist = 0 + keys.remove(fst_slice) + for i in range(len(slices)-1): + next, dist = find_nearest(new_reorder[-1]) + total_dist += dist + new_reorder.append(next) + keys.remove(next) + + print("slice_group") + print("\n".join([ + str(new_reorder.index(i)) for i in range(len(slices)) + ])) + + return total_dist + + +def core_reorder(df, fst_core, params=None, position="both", lcores=None): + """ + Find a core ordering that minimizes the distance from each to the adjacents + - df : panda dataframe + - fst_core : first core to use in the ordering + - params : columns to use (clflush_miss_n, clflush_remote_hit, ...) + - position : both, helper, or main + - lcores : subset of cores to reorder (eg 1 socket only) + """ + from_main = False + from_helper = False + if position == "both": + from_main = True + from_helper = True + elif position == "helper": + from_helper = True + elif position == "main": + from_main = True + + if params is None: + params = ["clflush_miss_n", "clflush_remote_hit"] + + + if lcores is None: + lcores = cores.copy() + + lcores.sort() + keys = lcores.copy() + print(keys) + main_sliced_df = { + i : df[(df["main_core"] == i)] for i in keys + } + helper_sliced_df = { + i : df[(df["helper_core"] == i)] for i in keys + } + + def distance(df1, df2, is_main=True): + dist = 0 + for param in params: + for hash, core in itertools.product(slices, lcores): + col = "main_core" + if is_main: + col ="helper_core" + + med1 = df1[(df1["hash"] == hash) & (df1[col] == core)][param].median() + med2 = df2[(df2["hash"] == hash) & (df2[col] == core)][param].median() + dist += (med1 - med2)**2 + + return dist + + def find_nearest(slice): + distances = { i : 0 for i in keys} + for i in distances: + if from_main: + distances[i] += distance(main_sliced_df[fst_core], main_sliced_df[i], is_main=True) + if from_helper: + distances[i] += distance(helper_sliced_df[fst_core], helper_sliced_df[i], is_main=False) + + nearest = sorted(keys, key=lambda x: distances[x])[0] + return nearest, distances[nearest] + + new_reorder = [fst_core] + total_dist = 0 + keys.remove(fst_core) + for i in range(len(lcores)-1): + next, dist = find_nearest(new_reorder[-1]) + total_dist += dist + new_reorder.append(next) + keys.remove(next) + + print("core") + print("\n".join([ + str(lcores[new_reorder.index(i)]) for i in lcores + ])) + return total_dist + + +for hash in slices: + res = slice_reorder(stats, hash) + print(f"hash: {hash}, total dist: {res}") + +half = len(cores)/2 +for core in cores: + res = core_reorder(stats, core, lcores=[k for k in cores if (k//half == core//half)]) + print(f"core: {core}, total dist: {res}")