From 545d07dd5e0a8074b560d4ac395bcf98c0ffeb48 Mon Sep 17 00:00:00 2001
From: augustin64 <me.git@augustin64.fr>
Date: Mon, 17 Jun 2024 16:39:11 +0200
Subject: [PATCH] Add analyse_reorder.py

---
 cache_utils/analyse_medians.py |  19 ++--
 cache_utils/analyse_reorder.py | 177 +++++++++++++++++++++++++++++++++
 2 files changed, 188 insertions(+), 8 deletions(-)
 create mode 100644 cache_utils/analyse_reorder.py

diff --git a/cache_utils/analyse_medians.py b/cache_utils/analyse_medians.py
index a1faaa7..2e18880 100644
--- a/cache_utils/analyse_medians.py
+++ b/cache_utils/analyse_medians.py
@@ -7,6 +7,8 @@ import os
 import sys
 import argparse
 import warnings
+import itertools
+from multiprocessing import Pool
 
 import numpy as np
 import pandas as pd
@@ -419,12 +421,6 @@ def all_facets(df, pre="", post="", *args, **kwargs):
     )
 
 
-if args.rslice:
-    rslice()
-
-# do_predictions(stats)
-# all_facets(stats, "")
-
 def do_facet(main: int, helper: int, line: bool):
     df = stats.copy(deep=True)
 
@@ -451,8 +447,15 @@ def do_facet(main: int, helper: int, line: bool):
         draw_fn=sns.lineplot if line else sns.scatterplot
     )
 
-from multiprocessing import Pool
-import itertools
+
+
+if args.rslice:
+    rslice()
+
+# do_predictions(stats)
+# all_facets(stats, "")
+
+
 
 with Pool(8) as pool:
     pool.starmap(do_facet, itertools.product((0, 1), (0, 1), (True, False)))
diff --git a/cache_utils/analyse_reorder.py b/cache_utils/analyse_reorder.py
new file mode 100644
index 0000000..fd991de
--- /dev/null
+++ b/cache_utils/analyse_reorder.py
@@ -0,0 +1,177 @@
+import os
+import sys
+import argparse
+import itertools
+from multiprocessing import Pool
+
+import numpy as np
+import pandas as pd
+import seaborn as sns
+from scipy import optimize
+import matplotlib.pyplot as plt
+
+
+parser = argparse.ArgumentParser(
+    prog=sys.argv[0],
+)
+parser.add_argument("path", help="Path to the experiment files")
+args = parser.parse_args()
+
+
+assert os.path.exists(args.path + ".stats.csv")
+assert os.path.exists(args.path + ".cores.csv")
+
+stats = pd.read_csv(
+    args.path + ".stats.csv",
+    dtype={
+        "main_core": np.int8,
+        "helper_core": np.int8,
+        # "address": int,
+        "hash": np.int8,
+        # "time": np.int16,
+        "clflush_remote_hit": np.float64,
+        "clflush_shared_hit": np.float64,
+        # "clflush_miss_f": np.int32,
+        # "clflush_local_hit_f": np.int32,
+        "clflush_miss_n": np.float64,
+        "clflush_local_hit_n": np.float64,
+        # "reload_miss": np.int32,
+        # "reload_remote_hit": np.int32,
+        # "reload_shared_hit": np.int32,
+        # "reload_local_hit": np.int32
+    },
+)
+
+core_mapping = pd.read_csv(args.path + ".cores.csv")
+
+cores = list(stats["main_core"].unique())
+slices = list(stats["hash"].unique())
+
+
+
+def slice_reorder(df, fst_slice, params=None):
+    if params is None:
+        params = ["clflush_miss_n", "clflush_remote_hit"]
+    
+    keys = slices.copy()
+    sliced_df = {
+        i : df[(df["hash"] == i)] for i in keys
+    }
+
+    def distance(df1, df2):
+        dist = 0
+        for param in params:
+            for core, helper in itertools.product(cores, cores):
+                med1 = df1[(df1["main_core"] == core) & (df1["helper_core"] == helper)][param].median()
+                med2 = df2[(df2["main_core"] == core) & (df2["helper_core"] == helper)][param].median()
+                dist += (med1 - med2)**2
+
+        return dist
+
+    def find_nearest(slice):
+        distances = { i : distance(sliced_df[slice], sliced_df[i]) for i in keys}
+        nearest = sorted(keys, key=lambda x: distances[x])[0]
+        return nearest, distances[nearest]
+
+    new_reorder = [fst_slice]
+    total_dist = 0
+    keys.remove(fst_slice)
+    for i in range(len(slices)-1):
+        next, dist = find_nearest(new_reorder[-1])
+        total_dist += dist
+        new_reorder.append(next)
+        keys.remove(next)
+    
+    print("slice_group")
+    print("\n".join([
+        str(new_reorder.index(i)) for i in range(len(slices))
+    ]))
+
+    return total_dist
+
+
+def core_reorder(df, fst_core, params=None, position="both", lcores=None):
+    """
+    Find a core ordering that minimizes the distance from each to the adjacents
+    - df : panda dataframe
+    - fst_core : first core to use in the ordering
+    - params : columns to use (clflush_miss_n, clflush_remote_hit, ...)
+    - position : both, helper, or main
+    - lcores : subset of cores to reorder (eg 1 socket only)
+    """
+    from_main = False
+    from_helper = False
+    if position == "both":
+        from_main = True
+        from_helper = True
+    elif position == "helper":
+        from_helper = True
+    elif position == "main":
+        from_main = True
+
+    if params is None:
+        params = ["clflush_miss_n", "clflush_remote_hit"]
+    
+    
+    if lcores is None:
+        lcores = cores.copy()
+
+    lcores.sort()
+    keys = lcores.copy()
+    print(keys)
+    main_sliced_df = {
+        i : df[(df["main_core"] == i)] for i in keys
+    }
+    helper_sliced_df = {
+        i : df[(df["helper_core"] == i)] for i in keys
+    }
+
+    def distance(df1, df2, is_main=True):
+        dist = 0
+        for param in params:
+            for hash, core in itertools.product(slices, lcores):
+                col = "main_core"
+                if is_main:
+                    col ="helper_core"
+
+                med1 = df1[(df1["hash"] == hash) & (df1[col] == core)][param].median()
+                med2 = df2[(df2["hash"] == hash) & (df2[col] == core)][param].median()
+                dist += (med1 - med2)**2
+
+        return dist
+
+    def find_nearest(slice):
+        distances = { i : 0 for i in keys}
+        for i in distances:
+            if from_main:
+                distances[i] += distance(main_sliced_df[fst_core], main_sliced_df[i], is_main=True)
+            if from_helper:
+                distances[i] += distance(helper_sliced_df[fst_core], helper_sliced_df[i], is_main=False)
+
+        nearest = sorted(keys, key=lambda x: distances[x])[0]
+        return nearest, distances[nearest]
+
+    new_reorder = [fst_core]
+    total_dist = 0
+    keys.remove(fst_core)
+    for i in range(len(lcores)-1):
+        next, dist = find_nearest(new_reorder[-1])
+        total_dist += dist
+        new_reorder.append(next)
+        keys.remove(next)
+
+    print("core")
+    print("\n".join([
+        str(lcores[new_reorder.index(i)]) for i in lcores
+    ]))
+    return total_dist
+
+
+for hash in slices:
+    res = slice_reorder(stats, hash)
+    print(f"hash: {hash}, total dist: {res}")
+
+half = len(cores)/2
+for core in cores:
+    res = core_reorder(stats, core, lcores=[k for k in cores if (k//half == core//half)])
+    print(f"core: {core}, total dist: {res}")