Analysis scripts

This commit is contained in:
GuillaumeDIDIER 2020-08-04 14:34:45 +02:00
parent a2d494d610
commit b747c64d16
3 changed files with 293 additions and 0 deletions

View File

@ -0,0 +1,15 @@
#!/bin/sh
NAME=`basename "$1" .txt.bz2`
echo $NAME
#bzcat $1 | awk '/^Iteration [:digit:]*[.]*/ ' > "${NAME}-iterations.txt"
#rm "${NAME}-results.csv.bz2"
#TODO forward NAME to awk script
#awk -v logname="${NAME}" -f `dirname $0`/analyse_iterations.awk < "${NAME}-iterations.txt" | bzip2 -c > "${NAME}-results.csv.bz2" # This uses system to split off awk scripts doing the analysis
bzgrep "RESULT:" "$1" | cut -b 8- | bzip2 -c > "${NAME}-results.csv.bz2"
# remove line with no data points
bzgrep -v -e "0,0,0,0,0,0,0,0,0,0$" "${NAME}-results.csv.bz2" | bzip2 -c > "${NAME}-results_lite.csv.bz2"
#paste -d"," *.csv > combined.csv

View File

@ -0,0 +1,146 @@
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
from sys import exit
import wquantiles as wq
import numpy as np
from functools import partial
import sys
def convert64(x):
return np.int64(int(x, base=16))
def convert8(x):
return np.int8(int(x, base=16))
df = pd.read_csv(sys.argv[1],
dtype={
"main_core": np.int8,
"helper_core": np.int8,
# "address": int,
# "hash": np.int8,
"time": np.int16,
"clflush_remote_hit": np.int32,
"clflush_shared_hit": np.int32,
"clflush_miss_f": np.int32,
"clflush_local_hit_f": np.int32,
"clflush_miss_n": np.int32,
"clflush_local_hit_n": np.int32,
"reload_miss": np.int32,
"reload_remote_hit": np.int32,
"reload_shared_hit": np.int32,
"reload_local_hit": np.int32},
converters={'address': convert64, 'hash': convert8},
)
sample_columns = [
"clflush_remote_hit",
"clflush_shared_hit",
"clflush_miss_f",
"clflush_local_hit_f",
"clflush_miss_n",
"clflush_local_hit_n",
"reload_miss",
"reload_remote_hit",
"reload_shared_hit",
"reload_local_hit",
]
sample_flush_columns = [
"clflush_remote_hit",
"clflush_shared_hit",
"clflush_miss_f",
"clflush_local_hit_f",
"clflush_miss_n",
"clflush_local_hit_n",
]
print(df.columns)
#df["Hash"] = df["Addr"].apply(lambda x: (x >> 15)&0x3)
print(df.head())
print(df["hash"].unique())
min_time = df["time"].min()
max_time = df["time"].max()
q10s = [wq.quantile(df["time"], df[col], 0.1) for col in sample_flush_columns]
q90s = [wq.quantile(df["time"], df[col], 0.9) for col in sample_flush_columns]
graph_upper = int(((max(q90s) + 19) // 10) * 10)
graph_lower = int(((min(q10s) - 10) // 10) * 10)
# graph_lower = (min_time // 10) * 10
# graph_upper = ((max_time + 9) // 10) * 10
print("graphing between {}, {}".format(graph_lower, graph_upper))
df_main_core_0 = df[df["main_core"] == 0]
#df_helper_core_0 = df[df["helper_core"] == 0]
g = sns.FacetGrid(df_main_core_0, col="helper_core", row="hash", legend_out=True)
g2 = sns.FacetGrid(df, col="main_core", row="hash", legend_out=True)
colours = ["b", "r", "g", "y"]
def custom_hist(x, *y, **kwargs):
for (i, yi) in enumerate(y):
kwargs["color"] = colours[i]
sns.distplot(x, range(graph_lower, graph_upper), hist_kws={"weights": yi, "histtype":"step"}, kde=False, **kwargs)
# Color convention here :
# Blue = miss
# Red = Remote Hit
# Green = Local Hit
# Yellow = Shared Hit
g.map(custom_hist, "time", "clflush_miss_n", "clflush_remote_hit", "clflush_local_hit_n", "clflush_shared_hit")
g2.map(custom_hist, "time", "clflush_miss_n", "clflush_remote_hit", "clflush_local_hit_n", "clflush_shared_hit")
# g.map(sns.distplot, "time", hist_kws={"weights": df["clflush_hit"]}, kde=False)
#plt.show()
#plt.figure()
def stat(x, key):
return wq.median(x["time"], x[key])
miss = df.groupby(["main_core", "helper_core", "hash"]).apply(stat, "clflush_miss_n")
hit_remote = df.groupby(["main_core", "helper_core", "hash"]).apply(stat, "clflush_remote_hit")
hit_local = df.groupby(["main_core", "helper_core", "hash"]).apply(stat, "clflush_local_hit_n")
hit_shared = df.groupby(["main_core", "helper_core", "hash"]).apply(stat, "clflush_shared_hit")
stats = miss.reset_index()
stats.columns = ["main_core", "helper_core", "hash", "clflush_miss_n"]
stats["clflush_remote_hit"] = hit_remote.values
stats["clflush_local_hit_n"] = hit_local.values
stats["clflush_shared_hit"] = hit_shared.values
stats.to_csv(sys.argv[1] + ".stats", index=False)
print(stats.to_string())
plt.show()
exit(0)
g = sns.FacetGrid(stats, row="Core")
g.map(sns.distplot, 'Miss', bins=range(100, 480), color="r")
g.map(sns.distplot, 'Hit', bins=range(100, 480))
plt.show()
#stats["clflush_miss_med"] = stats[[0]].apply(lambda x: x["miss_med"])
#stats["clflush_hit_med"] = stats[[0]].apply(lambda x: x["hit_med"])
#del df[[0]]
#print(hit.to_string(), miss.to_string())
# test = pd.DataFrame({"value" : [0, 5], "weight": [5, 1]})
# plt.figure()
# sns.distplot(test["value"], hist_kws={"weights": test["weight"]}, kde=False)
exit(0)

View File

@ -0,0 +1,132 @@
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
from sys import exit
import numpy as np
from scipy import optimize
import sys
# TODO
# sys.argv[1] should be the root
# with root-result_lite.csv.bz2 the result
# and .stats.csv
# root.slices a slice mapping - done
# root.cores a core + socket mapping - done -> move to analyse csv ?
#
# Facet plot with actual dot cloud + plot the linear regression
# each row is a slice
# each row is an origin core
# each column a helper core if applicable
stats = pd.read_csv(sys.argv[1] + ".stats.csv",
dtype={
"main_core": np.int8,
"helper_core": np.int8,
# "address": int,
"hash": np.int8,
# "time": np.int16,
"clflush_remote_hit": np.float64,
"clflush_shared_hit": np.float64,
# "clflush_miss_f": np.int32,
# "clflush_local_hit_f": np.int32,
"clflush_miss_n": np.float64,
"clflush_local_hit_n": np.float64,
# "reload_miss": np.int32,
# "reload_remote_hit": np.int32,
# "reload_shared_hit": np.int32,
# "reload_local_hit": np.int32
}
)
slice_mapping = pd.read_csv(sys.argv[1] + ".slices.csv")
core_mapping = pd.read_csv(sys.argv[1] + ".cores.csv")
print(core_mapping.to_string())
print(slice_mapping.to_string())
print("core {} is mapped to '{}'".format(4, repr(core_mapping.iloc[4])))
min_time_miss = stats["clflush_miss_n"].min()
max_time_miss = stats["clflush_miss_n"].max()
def remap_core(key):
def remap(core):
remapped = core_mapping.iloc[core]
return remapped[key]
return remap
stats["main_socket"] = stats["main_core"].apply(remap_core("socket"))
stats["main_core_fixed"] = stats["main_core"].apply(remap_core("core"))
stats["main_ht"] = stats["main_core"].apply(remap_core("hthread"))
stats["helper_socket"] = stats["helper_core"].apply(remap_core("socket"))
stats["helper_core_fixed"] = stats["helper_core"].apply(remap_core("core"))
stats["helper_ht"] = stats["helper_core"].apply(remap_core("hthread"))
# slice_mapping = {3: 0, 1: 1, 2: 2, 0: 3}
stats["slice_group"] = stats["hash"].apply(lambda h: slice_mapping.iloc[h])
graph_lower_miss = int((min_time_miss // 10) * 10)
graph_upper_miss = int(((max_time_miss + 9) // 10) * 10)
print("Graphing from {} to {}".format(graph_lower_miss, graph_upper_miss))
g = sns.FacetGrid(stats, row="main_core_fixed")
g.map(sns.scatterplot, 'slice_group', 'clflush_miss_n', color="b")
g.map(sns.scatterplot, 'slice_group', 'clflush_local_hit_n', color="g")
g2 = sns.FacetGrid(stats, row="main_core_fixed", col="slice_group")
g2.map(sns.scatterplot, 'helper_core_fixed', 'clflush_remote_hit', color="r")
g3 = sns.FacetGrid(stats, row="main_core_fixed", col="slice_group")
g3.map(sns.scatterplot, 'helper_core_fixed', 'clflush_shared_hit', color="y")
print(stats.head())
def miss_topology(x, C, h):
main_core = x["main_core_fixed"]
slice_group = x["slice_group"]
return C + h * abs(main_core - slice_group) + h * abs(slice_group + 1)
res = optimize.curve_fit(miss_topology, stats[["main_core_fixed", "slice_group"]], stats["clflush_miss_n"])
print(res)
def local_hit_topology(x, C, h):
main_core = x["main_core_fixed"]
slice_group = x["slice_group"]
return C + h * abs(main_core - slice_group)
def remote_hit_topology_1(x, C, h):
main_core = x["main_core_fixed"]
slice_group = x["slice_group"]
helper_core = x["helper_core_fixed"]
return C + h * abs(main_core - slice_group) + h * abs(slice_group - helper_core)
def remote_hit_topology_2(x, C, h):
main_core = x["main_core_fixed"]
slice_group = x["slice_group"]
helper_core = x["helper_core_fixed"]
return C + h * abs(main_core - slice_group) + h * abs(slice_group - helper_core) + h * abs(helper_core - main_core)
def shared_hit_topology_1(x, C, h):
main_core = x["main_core_fixed"]
slice_group = x["slice_group"]
helper_core = x["helper_core_fixed"]
return C + h * abs(main_core - slice_group) + h * max(abs(slice_group - main_core), abs(slice_group - helper_core))
# more ideas needed
plt.show()