From b747c64d160b190cc925efe4b6f95e0920f78668 Mon Sep 17 00:00:00 2001
From: GuillaumeDIDIER <guillaume.didier95@hotmail.fr>
Date: Tue, 4 Aug 2020 14:34:45 +0200
Subject: [PATCH] Analysis scripts

---
 cache_utils/2T-opt-2020-07-31/analyse.sh      |  15 ++
 cache_utils/2T-opt-2020-07-31/analyse_csv.py  | 146 ++++++++++++++++++
 .../2T-opt-2020-07-31/analyse_medians.py      | 132 ++++++++++++++++
 3 files changed, 293 insertions(+)
 create mode 100755 cache_utils/2T-opt-2020-07-31/analyse.sh
 create mode 100644 cache_utils/2T-opt-2020-07-31/analyse_csv.py
 create mode 100644 cache_utils/2T-opt-2020-07-31/analyse_medians.py

diff --git a/cache_utils/2T-opt-2020-07-31/analyse.sh b/cache_utils/2T-opt-2020-07-31/analyse.sh
new file mode 100755
index 0000000..1715eb9
--- /dev/null
+++ b/cache_utils/2T-opt-2020-07-31/analyse.sh
@@ -0,0 +1,15 @@
+#!/bin/sh
+
+NAME=`basename "$1" .txt.bz2`
+echo $NAME
+
+#bzcat $1 | awk '/^Iteration [:digit:]*[.]*/ ' > "${NAME}-iterations.txt"
+#rm "${NAME}-results.csv.bz2"
+#TODO forward NAME to awk script
+#awk -v logname="${NAME}" -f `dirname $0`/analyse_iterations.awk < "${NAME}-iterations.txt" | bzip2 -c > "${NAME}-results.csv.bz2" # This uses system to split off awk scripts doing the analysis
+
+bzgrep "RESULT:" "$1" | cut -b 8- | bzip2 -c > "${NAME}-results.csv.bz2"
+
+# remove line with no data points
+bzgrep -v -e "0,0,0,0,0,0,0,0,0,0$" "${NAME}-results.csv.bz2" | bzip2 -c > "${NAME}-results_lite.csv.bz2"
+#paste -d"," *.csv > combined.csv
diff --git a/cache_utils/2T-opt-2020-07-31/analyse_csv.py b/cache_utils/2T-opt-2020-07-31/analyse_csv.py
new file mode 100644
index 0000000..78fd7e5
--- /dev/null
+++ b/cache_utils/2T-opt-2020-07-31/analyse_csv.py
@@ -0,0 +1,146 @@
+import pandas as pd
+import matplotlib.pyplot as plt
+import seaborn as sns
+from sys import exit
+import wquantiles as wq
+import numpy as np
+
+from functools import partial
+
+import sys
+
+def convert64(x):
+    return np.int64(int(x, base=16))
+
+def convert8(x):
+    return np.int8(int(x, base=16))
+
+df = pd.read_csv(sys.argv[1],
+        dtype={
+            "main_core": np.int8,
+            "helper_core": np.int8,
+            # "address": int,
+            # "hash": np.int8,
+            "time": np.int16,
+            "clflush_remote_hit": np.int32,
+            "clflush_shared_hit": np.int32,
+            "clflush_miss_f": np.int32,
+            "clflush_local_hit_f": np.int32,
+            "clflush_miss_n": np.int32,
+            "clflush_local_hit_n": np.int32,
+            "reload_miss": np.int32,
+            "reload_remote_hit": np.int32,
+            "reload_shared_hit": np.int32,
+            "reload_local_hit": np.int32},
+        converters={'address': convert64, 'hash': convert8},
+        )
+
+sample_columns = [
+"clflush_remote_hit",
+"clflush_shared_hit",
+"clflush_miss_f",
+"clflush_local_hit_f",
+"clflush_miss_n",
+"clflush_local_hit_n",
+"reload_miss",
+"reload_remote_hit",
+"reload_shared_hit",
+"reload_local_hit",
+]
+
+sample_flush_columns = [
+    "clflush_remote_hit",
+    "clflush_shared_hit",
+    "clflush_miss_f",
+    "clflush_local_hit_f",
+    "clflush_miss_n",
+    "clflush_local_hit_n",
+]
+print(df.columns)
+#df["Hash"] = df["Addr"].apply(lambda x: (x >> 15)&0x3)
+
+print(df.head())
+
+print(df["hash"].unique())
+
+min_time = df["time"].min()
+max_time = df["time"].max()
+
+q10s = [wq.quantile(df["time"], df[col], 0.1) for col in sample_flush_columns]
+q90s = [wq.quantile(df["time"], df[col], 0.9) for col in sample_flush_columns]
+
+graph_upper = int(((max(q90s) + 19) // 10) * 10)
+graph_lower = int(((min(q10s) - 10) // 10) * 10)
+# graph_lower = (min_time // 10) * 10
+# graph_upper = ((max_time + 9) // 10) * 10
+
+print("graphing between {}, {}".format(graph_lower, graph_upper))
+
+df_main_core_0 = df[df["main_core"] == 0]
+#df_helper_core_0 = df[df["helper_core"] == 0]
+
+g = sns.FacetGrid(df_main_core_0, col="helper_core", row="hash", legend_out=True)
+g2 = sns.FacetGrid(df, col="main_core", row="hash", legend_out=True)
+
+
+colours = ["b", "r", "g", "y"]
+
+def custom_hist(x, *y, **kwargs):
+    for (i, yi) in enumerate(y):
+        kwargs["color"] = colours[i]
+        sns.distplot(x, range(graph_lower, graph_upper), hist_kws={"weights": yi, "histtype":"step"}, kde=False, **kwargs)
+
+# Color convention here :
+# Blue = miss
+# Red = Remote Hit
+# Green = Local Hit
+# Yellow = Shared Hit
+
+g.map(custom_hist, "time", "clflush_miss_n", "clflush_remote_hit", "clflush_local_hit_n", "clflush_shared_hit")
+
+g2.map(custom_hist, "time", "clflush_miss_n", "clflush_remote_hit", "clflush_local_hit_n", "clflush_shared_hit")
+
+# g.map(sns.distplot, "time", hist_kws={"weights": df["clflush_hit"]}, kde=False)
+
+#plt.show()
+#plt.figure()
+
+
+
+def stat(x, key):
+    return wq.median(x["time"], x[key])
+
+
+miss = df.groupby(["main_core", "helper_core", "hash"]).apply(stat, "clflush_miss_n")
+hit_remote = df.groupby(["main_core", "helper_core", "hash"]).apply(stat, "clflush_remote_hit")
+hit_local = df.groupby(["main_core", "helper_core", "hash"]).apply(stat, "clflush_local_hit_n")
+hit_shared = df.groupby(["main_core", "helper_core", "hash"]).apply(stat, "clflush_shared_hit")
+
+stats = miss.reset_index()
+stats.columns = ["main_core", "helper_core", "hash", "clflush_miss_n"]
+stats["clflush_remote_hit"] = hit_remote.values
+stats["clflush_local_hit_n"] = hit_local.values
+stats["clflush_shared_hit"] = hit_shared.values
+
+stats.to_csv(sys.argv[1] + ".stats", index=False)
+
+print(stats.to_string())
+
+plt.show()
+exit(0)
+g = sns.FacetGrid(stats, row="Core")
+
+g.map(sns.distplot, 'Miss', bins=range(100, 480), color="r")
+g.map(sns.distplot, 'Hit', bins=range(100, 480))
+plt.show()
+
+#stats["clflush_miss_med"] = stats[[0]].apply(lambda x: x["miss_med"])
+#stats["clflush_hit_med"] = stats[[0]].apply(lambda x: x["hit_med"])
+#del df[[0]]
+#print(hit.to_string(), miss.to_string())
+
+# test = pd.DataFrame({"value" : [0, 5], "weight": [5, 1]})
+# plt.figure()
+# sns.distplot(test["value"], hist_kws={"weights": test["weight"]}, kde=False)
+
+exit(0)
diff --git a/cache_utils/2T-opt-2020-07-31/analyse_medians.py b/cache_utils/2T-opt-2020-07-31/analyse_medians.py
new file mode 100644
index 0000000..c846bc1
--- /dev/null
+++ b/cache_utils/2T-opt-2020-07-31/analyse_medians.py
@@ -0,0 +1,132 @@
+import pandas as pd
+import matplotlib.pyplot as plt
+import seaborn as sns
+from sys import exit
+import numpy as np
+from scipy import optimize
+import sys
+
+# TODO
+# sys.argv[1] should be the root
+# with root-result_lite.csv.bz2 the result
+# and .stats.csv
+# root.slices a slice mapping - done
+# root.cores a core + socket mapping - done -> move to analyse csv ?
+#
+# Facet plot with actual dot cloud + plot the linear regression
+# each row is a slice
+# each row is an origin core
+# each column a helper core if applicable
+
+
+stats = pd.read_csv(sys.argv[1] + ".stats.csv",
+                    dtype={
+                        "main_core": np.int8,
+                        "helper_core": np.int8,
+                        # "address": int,
+                        "hash": np.int8,
+                        # "time": np.int16,
+                        "clflush_remote_hit": np.float64,
+                        "clflush_shared_hit": np.float64,
+                        # "clflush_miss_f": np.int32,
+                        # "clflush_local_hit_f": np.int32,
+                        "clflush_miss_n": np.float64,
+                        "clflush_local_hit_n": np.float64,
+                        # "reload_miss": np.int32,
+                        # "reload_remote_hit": np.int32,
+                        # "reload_shared_hit": np.int32,
+                        # "reload_local_hit": np.int32
+                    }
+                    )
+
+slice_mapping = pd.read_csv(sys.argv[1] + ".slices.csv")
+core_mapping = pd.read_csv(sys.argv[1] + ".cores.csv")
+
+print(core_mapping.to_string())
+print(slice_mapping.to_string())
+
+print("core {} is mapped to '{}'".format(4, repr(core_mapping.iloc[4])))
+
+min_time_miss = stats["clflush_miss_n"].min()
+max_time_miss = stats["clflush_miss_n"].max()
+
+
+def remap_core(key):
+    def remap(core):
+        remapped = core_mapping.iloc[core]
+        return remapped[key]
+
+    return remap
+
+
+stats["main_socket"] = stats["main_core"].apply(remap_core("socket"))
+stats["main_core_fixed"] = stats["main_core"].apply(remap_core("core"))
+stats["main_ht"] = stats["main_core"].apply(remap_core("hthread"))
+stats["helper_socket"] = stats["helper_core"].apply(remap_core("socket"))
+stats["helper_core_fixed"] = stats["helper_core"].apply(remap_core("core"))
+stats["helper_ht"] = stats["helper_core"].apply(remap_core("hthread"))
+
+# slice_mapping = {3: 0, 1: 1, 2: 2, 0: 3}
+
+stats["slice_group"] = stats["hash"].apply(lambda h: slice_mapping.iloc[h])
+
+graph_lower_miss = int((min_time_miss // 10) * 10)
+graph_upper_miss = int(((max_time_miss + 9) // 10) * 10)
+
+print("Graphing from {} to {}".format(graph_lower_miss, graph_upper_miss))
+
+g = sns.FacetGrid(stats, row="main_core_fixed")
+
+g.map(sns.scatterplot, 'slice_group', 'clflush_miss_n', color="b")
+g.map(sns.scatterplot, 'slice_group', 'clflush_local_hit_n', color="g")
+
+g2 = sns.FacetGrid(stats, row="main_core_fixed", col="slice_group")
+g2.map(sns.scatterplot, 'helper_core_fixed', 'clflush_remote_hit', color="r")
+
+g3 = sns.FacetGrid(stats, row="main_core_fixed", col="slice_group")
+g3.map(sns.scatterplot, 'helper_core_fixed', 'clflush_shared_hit', color="y")
+
+print(stats.head())
+
+
+def miss_topology(x, C, h):
+    main_core = x["main_core_fixed"]
+    slice_group = x["slice_group"]
+    return C + h * abs(main_core - slice_group) + h * abs(slice_group + 1)
+
+
+res = optimize.curve_fit(miss_topology, stats[["main_core_fixed", "slice_group"]], stats["clflush_miss_n"])
+print(res)
+
+
+def local_hit_topology(x, C, h):
+    main_core = x["main_core_fixed"]
+    slice_group = x["slice_group"]
+    return C + h * abs(main_core - slice_group)
+
+
+
+def remote_hit_topology_1(x, C, h):
+    main_core = x["main_core_fixed"]
+    slice_group = x["slice_group"]
+    helper_core = x["helper_core_fixed"]
+    return C + h * abs(main_core - slice_group) + h * abs(slice_group - helper_core)
+
+
+def remote_hit_topology_2(x, C, h):
+    main_core = x["main_core_fixed"]
+    slice_group = x["slice_group"]
+    helper_core = x["helper_core_fixed"]
+    return C + h * abs(main_core - slice_group) + h * abs(slice_group - helper_core) + h * abs(helper_core - main_core)
+
+
+def shared_hit_topology_1(x, C, h):
+    main_core = x["main_core_fixed"]
+    slice_group = x["slice_group"]
+    helper_core = x["helper_core_fixed"]
+    return C + h * abs(main_core - slice_group) + h * max(abs(slice_group - main_core), abs(slice_group - helper_core))
+
+
+# more ideas needed
+
+plt.show()