From b747c64d160b190cc925efe4b6f95e0920f78668 Mon Sep 17 00:00:00 2001 From: GuillaumeDIDIER Date: Tue, 4 Aug 2020 14:34:45 +0200 Subject: [PATCH] Analysis scripts --- cache_utils/2T-opt-2020-07-31/analyse.sh | 15 ++ cache_utils/2T-opt-2020-07-31/analyse_csv.py | 146 ++++++++++++++++++ .../2T-opt-2020-07-31/analyse_medians.py | 132 ++++++++++++++++ 3 files changed, 293 insertions(+) create mode 100755 cache_utils/2T-opt-2020-07-31/analyse.sh create mode 100644 cache_utils/2T-opt-2020-07-31/analyse_csv.py create mode 100644 cache_utils/2T-opt-2020-07-31/analyse_medians.py diff --git a/cache_utils/2T-opt-2020-07-31/analyse.sh b/cache_utils/2T-opt-2020-07-31/analyse.sh new file mode 100755 index 0000000..1715eb9 --- /dev/null +++ b/cache_utils/2T-opt-2020-07-31/analyse.sh @@ -0,0 +1,15 @@ +#!/bin/sh + +NAME=`basename "$1" .txt.bz2` +echo $NAME + +#bzcat $1 | awk '/^Iteration [:digit:]*[.]*/ ' > "${NAME}-iterations.txt" +#rm "${NAME}-results.csv.bz2" +#TODO forward NAME to awk script +#awk -v logname="${NAME}" -f `dirname $0`/analyse_iterations.awk < "${NAME}-iterations.txt" | bzip2 -c > "${NAME}-results.csv.bz2" # This uses system to split off awk scripts doing the analysis + +bzgrep "RESULT:" "$1" | cut -b 8- | bzip2 -c > "${NAME}-results.csv.bz2" + +# remove line with no data points +bzgrep -v -e "0,0,0,0,0,0,0,0,0,0$" "${NAME}-results.csv.bz2" | bzip2 -c > "${NAME}-results_lite.csv.bz2" +#paste -d"," *.csv > combined.csv diff --git a/cache_utils/2T-opt-2020-07-31/analyse_csv.py b/cache_utils/2T-opt-2020-07-31/analyse_csv.py new file mode 100644 index 0000000..78fd7e5 --- /dev/null +++ b/cache_utils/2T-opt-2020-07-31/analyse_csv.py @@ -0,0 +1,146 @@ +import pandas as pd +import matplotlib.pyplot as plt +import seaborn as sns +from sys import exit +import wquantiles as wq +import numpy as np + +from functools import partial + +import sys + +def convert64(x): + return np.int64(int(x, base=16)) + +def convert8(x): + return np.int8(int(x, base=16)) + +df = pd.read_csv(sys.argv[1], + dtype={ + "main_core": np.int8, + "helper_core": np.int8, + # "address": int, + # "hash": np.int8, + "time": np.int16, + "clflush_remote_hit": np.int32, + "clflush_shared_hit": np.int32, + "clflush_miss_f": np.int32, + "clflush_local_hit_f": np.int32, + "clflush_miss_n": np.int32, + "clflush_local_hit_n": np.int32, + "reload_miss": np.int32, + "reload_remote_hit": np.int32, + "reload_shared_hit": np.int32, + "reload_local_hit": np.int32}, + converters={'address': convert64, 'hash': convert8}, + ) + +sample_columns = [ +"clflush_remote_hit", +"clflush_shared_hit", +"clflush_miss_f", +"clflush_local_hit_f", +"clflush_miss_n", +"clflush_local_hit_n", +"reload_miss", +"reload_remote_hit", +"reload_shared_hit", +"reload_local_hit", +] + +sample_flush_columns = [ + "clflush_remote_hit", + "clflush_shared_hit", + "clflush_miss_f", + "clflush_local_hit_f", + "clflush_miss_n", + "clflush_local_hit_n", +] +print(df.columns) +#df["Hash"] = df["Addr"].apply(lambda x: (x >> 15)&0x3) + +print(df.head()) + +print(df["hash"].unique()) + +min_time = df["time"].min() +max_time = df["time"].max() + +q10s = [wq.quantile(df["time"], df[col], 0.1) for col in sample_flush_columns] +q90s = [wq.quantile(df["time"], df[col], 0.9) for col in sample_flush_columns] + +graph_upper = int(((max(q90s) + 19) // 10) * 10) +graph_lower = int(((min(q10s) - 10) // 10) * 10) +# graph_lower = (min_time // 10) * 10 +# graph_upper = ((max_time + 9) // 10) * 10 + +print("graphing between {}, {}".format(graph_lower, graph_upper)) + +df_main_core_0 = df[df["main_core"] == 0] +#df_helper_core_0 = df[df["helper_core"] == 0] + +g = sns.FacetGrid(df_main_core_0, col="helper_core", row="hash", legend_out=True) +g2 = sns.FacetGrid(df, col="main_core", row="hash", legend_out=True) + + +colours = ["b", "r", "g", "y"] + +def custom_hist(x, *y, **kwargs): + for (i, yi) in enumerate(y): + kwargs["color"] = colours[i] + sns.distplot(x, range(graph_lower, graph_upper), hist_kws={"weights": yi, "histtype":"step"}, kde=False, **kwargs) + +# Color convention here : +# Blue = miss +# Red = Remote Hit +# Green = Local Hit +# Yellow = Shared Hit + +g.map(custom_hist, "time", "clflush_miss_n", "clflush_remote_hit", "clflush_local_hit_n", "clflush_shared_hit") + +g2.map(custom_hist, "time", "clflush_miss_n", "clflush_remote_hit", "clflush_local_hit_n", "clflush_shared_hit") + +# g.map(sns.distplot, "time", hist_kws={"weights": df["clflush_hit"]}, kde=False) + +#plt.show() +#plt.figure() + + + +def stat(x, key): + return wq.median(x["time"], x[key]) + + +miss = df.groupby(["main_core", "helper_core", "hash"]).apply(stat, "clflush_miss_n") +hit_remote = df.groupby(["main_core", "helper_core", "hash"]).apply(stat, "clflush_remote_hit") +hit_local = df.groupby(["main_core", "helper_core", "hash"]).apply(stat, "clflush_local_hit_n") +hit_shared = df.groupby(["main_core", "helper_core", "hash"]).apply(stat, "clflush_shared_hit") + +stats = miss.reset_index() +stats.columns = ["main_core", "helper_core", "hash", "clflush_miss_n"] +stats["clflush_remote_hit"] = hit_remote.values +stats["clflush_local_hit_n"] = hit_local.values +stats["clflush_shared_hit"] = hit_shared.values + +stats.to_csv(sys.argv[1] + ".stats", index=False) + +print(stats.to_string()) + +plt.show() +exit(0) +g = sns.FacetGrid(stats, row="Core") + +g.map(sns.distplot, 'Miss', bins=range(100, 480), color="r") +g.map(sns.distplot, 'Hit', bins=range(100, 480)) +plt.show() + +#stats["clflush_miss_med"] = stats[[0]].apply(lambda x: x["miss_med"]) +#stats["clflush_hit_med"] = stats[[0]].apply(lambda x: x["hit_med"]) +#del df[[0]] +#print(hit.to_string(), miss.to_string()) + +# test = pd.DataFrame({"value" : [0, 5], "weight": [5, 1]}) +# plt.figure() +# sns.distplot(test["value"], hist_kws={"weights": test["weight"]}, kde=False) + +exit(0) diff --git a/cache_utils/2T-opt-2020-07-31/analyse_medians.py b/cache_utils/2T-opt-2020-07-31/analyse_medians.py new file mode 100644 index 0000000..c846bc1 --- /dev/null +++ b/cache_utils/2T-opt-2020-07-31/analyse_medians.py @@ -0,0 +1,132 @@ +import pandas as pd +import matplotlib.pyplot as plt +import seaborn as sns +from sys import exit +import numpy as np +from scipy import optimize +import sys + +# TODO +# sys.argv[1] should be the root +# with root-result_lite.csv.bz2 the result +# and .stats.csv +# root.slices a slice mapping - done +# root.cores a core + socket mapping - done -> move to analyse csv ? +# +# Facet plot with actual dot cloud + plot the linear regression +# each row is a slice +# each row is an origin core +# each column a helper core if applicable + + +stats = pd.read_csv(sys.argv[1] + ".stats.csv", + dtype={ + "main_core": np.int8, + "helper_core": np.int8, + # "address": int, + "hash": np.int8, + # "time": np.int16, + "clflush_remote_hit": np.float64, + "clflush_shared_hit": np.float64, + # "clflush_miss_f": np.int32, + # "clflush_local_hit_f": np.int32, + "clflush_miss_n": np.float64, + "clflush_local_hit_n": np.float64, + # "reload_miss": np.int32, + # "reload_remote_hit": np.int32, + # "reload_shared_hit": np.int32, + # "reload_local_hit": np.int32 + } + ) + +slice_mapping = pd.read_csv(sys.argv[1] + ".slices.csv") +core_mapping = pd.read_csv(sys.argv[1] + ".cores.csv") + +print(core_mapping.to_string()) +print(slice_mapping.to_string()) + +print("core {} is mapped to '{}'".format(4, repr(core_mapping.iloc[4]))) + +min_time_miss = stats["clflush_miss_n"].min() +max_time_miss = stats["clflush_miss_n"].max() + + +def remap_core(key): + def remap(core): + remapped = core_mapping.iloc[core] + return remapped[key] + + return remap + + +stats["main_socket"] = stats["main_core"].apply(remap_core("socket")) +stats["main_core_fixed"] = stats["main_core"].apply(remap_core("core")) +stats["main_ht"] = stats["main_core"].apply(remap_core("hthread")) +stats["helper_socket"] = stats["helper_core"].apply(remap_core("socket")) +stats["helper_core_fixed"] = stats["helper_core"].apply(remap_core("core")) +stats["helper_ht"] = stats["helper_core"].apply(remap_core("hthread")) + +# slice_mapping = {3: 0, 1: 1, 2: 2, 0: 3} + +stats["slice_group"] = stats["hash"].apply(lambda h: slice_mapping.iloc[h]) + +graph_lower_miss = int((min_time_miss // 10) * 10) +graph_upper_miss = int(((max_time_miss + 9) // 10) * 10) + +print("Graphing from {} to {}".format(graph_lower_miss, graph_upper_miss)) + +g = sns.FacetGrid(stats, row="main_core_fixed") + +g.map(sns.scatterplot, 'slice_group', 'clflush_miss_n', color="b") +g.map(sns.scatterplot, 'slice_group', 'clflush_local_hit_n', color="g") + +g2 = sns.FacetGrid(stats, row="main_core_fixed", col="slice_group") +g2.map(sns.scatterplot, 'helper_core_fixed', 'clflush_remote_hit', color="r") + +g3 = sns.FacetGrid(stats, row="main_core_fixed", col="slice_group") +g3.map(sns.scatterplot, 'helper_core_fixed', 'clflush_shared_hit', color="y") + +print(stats.head()) + + +def miss_topology(x, C, h): + main_core = x["main_core_fixed"] + slice_group = x["slice_group"] + return C + h * abs(main_core - slice_group) + h * abs(slice_group + 1) + + +res = optimize.curve_fit(miss_topology, stats[["main_core_fixed", "slice_group"]], stats["clflush_miss_n"]) +print(res) + + +def local_hit_topology(x, C, h): + main_core = x["main_core_fixed"] + slice_group = x["slice_group"] + return C + h * abs(main_core - slice_group) + + + +def remote_hit_topology_1(x, C, h): + main_core = x["main_core_fixed"] + slice_group = x["slice_group"] + helper_core = x["helper_core_fixed"] + return C + h * abs(main_core - slice_group) + h * abs(slice_group - helper_core) + + +def remote_hit_topology_2(x, C, h): + main_core = x["main_core_fixed"] + slice_group = x["slice_group"] + helper_core = x["helper_core_fixed"] + return C + h * abs(main_core - slice_group) + h * abs(slice_group - helper_core) + h * abs(helper_core - main_core) + + +def shared_hit_topology_1(x, C, h): + main_core = x["main_core_fixed"] + slice_group = x["slice_group"] + helper_core = x["helper_core_fixed"] + return C + h * abs(main_core - slice_group) + h * max(abs(slice_group - main_core), abs(slice_group - helper_core)) + + +# more ideas needed + +plt.show()