Compare commits

...

2 Commits

Author SHA1 Message Date
acc4fb6c9a Update analyse_medians.py
- Add `--no-slice-remap`
- facet_grid: add `separate_hthreads` option
2024-06-28 09:52:29 +02:00
051db5fbeb Remove double median method 2024-06-28 09:49:11 +02:00
2 changed files with 105 additions and 81 deletions

View File

@ -3,24 +3,26 @@
# SPDX-License-Identifier: Apache-2.0 # SPDX-License-Identifier: Apache-2.0
# SPDX-License-Identifier: MIT # SPDX-License-Identifier: MIT
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
#import tikzplotlib
import wquantiles as wq
import numpy as np
import argparse import argparse
import warnings
import time
import json
import sys import sys
import os import os
import json import matplotlib.style as mplstyle
import warnings import matplotlib.pyplot as plt
import wquantiles as wq
import seaborn as sns
import pandas as pd
import numpy as np
#import tikzplotlib
warnings.filterwarnings('ignore')
print("warnings are filtered, enable them back if you are having some trouble")
sns.set_theme() t = time.time()
def print_timed(*args, **kwargs):
print(f"[{round(time.time()-t, 1):>8}]", *args, **kwargs)
def dict_to_json(d): def dict_to_json(d):
if isinstance(d, dict): if isinstance(d, dict):
@ -86,6 +88,9 @@ parser.add_argument(
args = parser.parse_args() args = parser.parse_args()
warnings.filterwarnings('ignore')
print_timed("warnings are filtered, enable them back if you are having some trouble")
img_dir = os.path.dirname(args.path)+"/figs/" img_dir = os.path.dirname(args.path)+"/figs/"
os.makedirs(img_dir, exist_ok=True) os.makedirs(img_dir, exist_ok=True)
@ -114,7 +119,7 @@ df = pd.read_csv(args.path + "-results_lite.csv.bz2",
converters={'address': convert64, 'hash': convert8}, converters={'address': convert64, 'hash': convert8},
) )
print(f"Loaded columns : {list(df.keys())}") print_timed(f"Loaded columns : {list(df.keys())}")
sample_columns = [ sample_columns = [
"clflush_remote_hit", "clflush_remote_hit",
@ -143,24 +148,30 @@ if args.slice_remap:
core_mapping = pd.read_csv(args.path + ".cores.csv") core_mapping = pd.read_csv(args.path + ".cores.csv")
def remap_core(key): def remap_core(key):
column = core_mapping.columns.get_loc(key)
def remap(core): def remap(core):
remapped = core_mapping.iloc[core] return core_mapping.iat[core, column]
return remapped[key]
return remap return remap
df["main_socket"] = df["main_core"].apply(remap_core("socket")) columns = [
df["main_core_fixed"] = df["main_core"].apply(remap_core("core")) ("main_socket", "main_core", "socket")
df["main_ht"] = df["main_core"].apply(remap_core("hthread")) ("main_core_fixed", "main_core", "core")
df["helper_socket"] = df["helper_core"].apply(remap_core("socket")) ("main_ht", "main_core", "hthread")
df["helper_core_fixed"] = df["helper_core"].apply(remap_core("core")) ("helper_socket", "helper_core", "socket")
df["helper_ht"] = df["helper_core"].apply(remap_core("hthread")) ("helper_core_fixed", "helper_core", "core")
("helper_ht", "helper_core", "hthread")
]
for (col, icol, key) in columns:
df[col] = df[icol].apply(remap_core(key))
print_timed(f"Column {col} added")
if args.slice_remap: if args.slice_remap:
slice_remap = lambda h: slice_mapping["slice_group"].iloc[h] slice_remap = lambda h: slice_mapping["slice_group"].iloc[h]
df["slice_group"] = df["hash"].apply(slice_remap) df["slice_group"] = df["hash"].apply(slice_remap)
print_timed(f"Column slice_group added")
else: else:
df["slice_group"] = df["hash"] df["slice_group"] = df["hash"]
@ -172,9 +183,10 @@ def get_graphing_bounds():
return int(((min(q10s) - 10) // 10) * 10), int(((max(q90s) + 19) // 10) * 10) return int(((min(q10s) - 10) // 10) * 10), int(((max(q90s) + 19) // 10) * 10)
graph_lower, graph_upper = get_graphing_bounds() mplstyle.use("fast")
print("graphing between {}, {}".format(graph_lower, graph_upper))
graph_lower, graph_upper = get_graphing_bounds()
print_timed(f"graphing between {graph_lower}, {graph_upper}")
def plot(filename, g=None): def plot(filename, g=None):
if args.no_plot: if args.no_plot:
@ -182,6 +194,7 @@ def plot(filename, g=None):
g.savefig(img_dir+filename) g.savefig(img_dir+filename)
else: else:
plt.savefig(img_dir+filename) plt.savefig(img_dir+filename)
print_timed(f"Saved {filename}")
plt.close() plt.close()
plt.show() plt.show()
@ -233,32 +246,8 @@ def export_stats_csv():
return maxi-mini return maxi-mini
def compute_stat(x, key): def compute_stat(x, key):
def compute_median(x):
return wq.median(x["time"], x[key]) return wq.median(x["time"], x[key])
filtered_x = x[(x[key] != 0)]
mini, maxi = filtered_x["time"].min(), filtered_x["time"].max()
miss_spread = get_spread(x, "clflush_miss_n")
if maxi-mini < 3*miss_spread:
med = compute_median(x)
return [med, med]
if key == "clflush_remote_hit":
"""print(
"double for core {}:{}@{}, helper {}:{}@{}".format(
x["main_core_fixed"].unique()[0],
x["main_ht"].unique()[0],
x["main_socket"].unique()[0],
x["helper_core_fixed"].unique()[0],
x["helper_ht"].unique()[0],
x["helper_socket"].unique()[0],
)
)"""
center = mini + (maxi-mini)/2
return [compute_median(filtered_x[(filtered_x["time"] < center)]), compute_median(filtered_x[(filtered_x["time"] >= center)])]
df_grouped = df.groupby(["main_core", "helper_core", "hash"]) df_grouped = df.groupby(["main_core", "helper_core", "hash"])
miss = df_grouped.apply(lambda x: compute_stat(x, "clflush_miss_n")) miss = df_grouped.apply(lambda x: compute_stat(x, "clflush_miss_n"))
@ -276,8 +265,6 @@ def export_stats_csv():
"clflush_shared_hit": hit_shared.values "clflush_shared_hit": hit_shared.values
}) })
stats = stats.explode(['clflush_miss_n', 'clflush_remote_hit', 'clflush_local_hit_n', 'clflush_shared_hit'])
stats.to_csv(args.path + ".stats.csv", index=False) stats.to_csv(args.path + ".stats.csv", index=False)
@ -308,4 +295,4 @@ if not args.stats:
if not os.path.exists(args.path + ".stats.csv") or args.stats: if not os.path.exists(args.path + ".stats.csv") or args.stats:
export_stats_csv() export_stats_csv()
else: else:
print("Skipping .stats.csv export") print_timed("Skipping .stats.csv export")

View File

@ -15,7 +15,9 @@ import pandas as pd
import seaborn as sns import seaborn as sns
from scipy import optimize from scipy import optimize
import matplotlib.pyplot as plt import matplotlib.pyplot as plt
import matplotlib.style as mplstyle
mplstyle.use("fast")
warnings.filterwarnings("ignore") warnings.filterwarnings("ignore")
print("warnings are filtered, enable them back if you are having some trouble") print("warnings are filtered, enable them back if you are having some trouble")
@ -54,14 +56,23 @@ parser.add_argument(
help="Create slice{} directories with segmented grid", help="Create slice{} directories with segmented grid",
) )
parser.add_argument(
"--no-slice-remap",
dest="slice_remap",
action="store_false",
default=True,
help="Don't remap the slices"
)
args = parser.parse_args() args = parser.parse_args()
img_dir = os.path.dirname(args.path) + "/figs/" img_dir = os.path.dirname(args.path) + "/figs/"
os.makedirs(img_dir, exist_ok=True) os.makedirs(img_dir, exist_ok=True)
assert os.path.exists(args.path + ".stats.csv") assert os.path.exists(args.path + ".stats.csv")
assert os.path.exists(args.path + ".slices.csv")
assert os.path.exists(args.path + ".cores.csv") assert os.path.exists(args.path + ".cores.csv")
if args.slice_remap:
assert os.path.exists(args.path + ".slices.csv")
stats = pd.read_csv( stats = pd.read_csv(
args.path + ".stats.csv", args.path + ".stats.csv",
@ -84,6 +95,7 @@ stats = pd.read_csv(
}, },
) )
if args.slice_remap:
slice_mapping = pd.read_csv(args.path + ".slices.csv") slice_mapping = pd.read_csv(args.path + ".slices.csv")
core_mapping = pd.read_csv(args.path + ".cores.csv") core_mapping = pd.read_csv(args.path + ".cores.csv")
@ -129,9 +141,12 @@ stats["helper_ht"] = stats["helper_core"].apply(remap_core("hthread"))
# slice_mapping = {3: 0, 1: 1, 2: 2, 0: 3} # slice_mapping = {3: 0, 1: 1, 2: 2, 0: 3}
if args.slice_remap:
stats["slice_group"] = stats["hash"].apply( stats["slice_group"] = stats["hash"].apply(
lambda h: slice_mapping["slice_group"].iloc[h] lambda h: slice_mapping["slice_group"].iloc[h]
) )
else:
stats["slice_group"] = stats["hash"]
graph_lower_miss = int((min_time_miss // 10) * 10) graph_lower_miss = int((min_time_miss // 10) * 10)
graph_upper_miss = int(((max_time_miss + 9) // 10) * 10) graph_upper_miss = int(((max_time_miss + 9) // 10) * 10)
@ -386,14 +401,25 @@ def facet_grid(
"clflush_miss_n", "clflush_miss_n",
], ],
colors=["y", "r", "g", "b"], colors=["y", "r", "g", "b"],
separate_hthreads=False,
title=None, title=None,
): ):
""" """
Creates a facet grid showing all points Creates a facet grid showing all points
""" """
if separate_hthreads:
colors=["y", "r", "g", "b"]
for el in shown:
for helper, main in itertools.product((0, 1), (0, 1)):
df[el+f"_m{main}h{helper}"] = df[(df["main_ht"] == main) & (df["helper_ht"] == helper)][el]
grid = sns.FacetGrid(df, row=row, col=col) grid = sns.FacetGrid(df, row=row, col=col)
for i, el in enumerate(shown): for i, el in enumerate(shown):
if separate_hthreads:
for helper, main in itertools.product((0, 1), (0, 1)):
grid.map(draw_fn, third, el+f"_m{main}h{helper}", color=colors[(helper+2*main) % len(colors)])# marker=['+', 'x'][helper])
else:
grid.map(draw_fn, third, el, color=colors[i % len(colors)]) grid.map(draw_fn, third, el, color=colors[i % len(colors)])
if title is not None: if title is not None:
@ -408,7 +434,7 @@ def all_facets(df, pre="", post="", *args, **kwargs):
""" """
facet_grid( facet_grid(
df, "main_core_fixed", "helper_core_fixed", "slice_group", df, "helper_core_fixed", "main_core_fixed", "slice_group",
title=f"{pre}facet_slice{post}.png", *args, **kwargs title=f"{pre}facet_slice{post}.png", *args, **kwargs
) )
facet_grid( facet_grid(
@ -416,47 +442,58 @@ def all_facets(df, pre="", post="", *args, **kwargs):
title=f"{pre}facet_main{post}.png", *args, **kwargs title=f"{pre}facet_main{post}.png", *args, **kwargs
) )
facet_grid( facet_grid(
df, "slice_group", "main_core_fixed", "helper_core_fixed", df, "main_core_fixed", "slice_group", "helper_core_fixed",
title=f"{pre}facet_helper{post}.png", *args, **kwargs title=f"{pre}facet_helper{post}.png", *args, **kwargs
) )
def do_facet(main: int, helper: int, line: bool): def do_facet(main: int, helper: int, line: bool, metrics: str):
"""
- metrics: hit, miss or all
"""
df = stats.copy(deep=True) df = stats.copy(deep=True)
print(f"Doing all facets {main}x{helper}") print(f"Doing all facets {main}x{helper} {metrics}")
filtered_df = stats[ filtered_df = stats[
(stats["main_core_fixed"] // (num_core / 2) == main) (stats["main_socket"] == main)
& (stats["helper_core_fixed"] // (num_core / 2) == helper) & (stats["helper_socket"] == helper)
] ]
method = "line" if line else "pt" method = "line" if line else "pt"
all_facets( shown = []
filtered_df, colors = []
pre=f"hit_{method}_", if metrics == "hit" or metrics == "all":
post=f"_m{main}h{helper}", shown.append("clflush_remote_hit")
shown=["clflush_remote_hit"], colors.append("r")
colors=["r"], if metrics == "miss" or metrics == "all":
draw_fn=sns.lineplot if line else sns.scatterplot shown.append("clflush_miss_n")
) colors.append("b")
all_facets(
filtered_df,
pre=f"miss_{method}_",
post=f"_m{main}h{helper}",
shown=["clflush_miss_n"],
colors=["b"],
draw_fn=sns.lineplot if line else sns.scatterplot
)
all_facets(
filtered_df,
pre=f"{metrics}_{method}_",
post=f"_m{main}h{helper}",
shown=shown,
colors=colors,
draw_fn=sns.lineplot if line else sns.scatterplot
)
if args.rslice: if args.rslice:
rslice() rslice()
# do_predictions(stats) # do_predictions(stats)
# all_facets(stats, "") # all_facets(stats, shown=["clflush_remote_hit"], colors=["r"])
with Pool(8) as pool: with Pool(8) as pool:
pool.starmap(do_facet, itertools.product((0, 1), (0, 1), (True, False))) pool.starmap(
do_facet,
itertools.product(
stats["main_socket"].unique(),
stats["helper_socket"].unique(),
(True, False),
("hit", "miss")
)
)