From 26b2d2294263e6db6539662dd2a5cab9a8d77489 Mon Sep 17 00:00:00 2001 From: GuillaumeDIDIER Date: Fri, 1 May 2020 10:24:15 +0200 Subject: [PATCH] Update result analysis scripts and code to the version for results-2020-04-20 --- .idea/DendrobatesTinctoriusAzureus.iml | 5 +- .../results-2020-04-07-5c025fb/analyse_csv.py | 35 ++++++++- cache_utils/results-2020-04-17/analyse.sh | 6 ++ cache_utils/results-2020-04-17/analyse_csv.py | 63 ++++++++++++++++ .../results-2020-04-17/analyse_iterations.awk | 16 ++++ cache_utils/results-2020-04-20/analyse.sh | 6 ++ cache_utils/results-2020-04-20/analyse_csv.py | 52 +++++++++++++ .../results-2020-04-20/analyse_iterations.awk | 16 ++++ cache_utils/src/calibration.rs | 75 +++++++++++++------ cache_utils/src/lib.rs | 1 + cache_utils/src/main.rs | 27 ++++++- requirements.txt | 3 + 12 files changed, 279 insertions(+), 26 deletions(-) create mode 100755 cache_utils/results-2020-04-17/analyse.sh create mode 100644 cache_utils/results-2020-04-17/analyse_csv.py create mode 100644 cache_utils/results-2020-04-17/analyse_iterations.awk create mode 100755 cache_utils/results-2020-04-20/analyse.sh create mode 100644 cache_utils/results-2020-04-20/analyse_csv.py create mode 100644 cache_utils/results-2020-04-20/analyse_iterations.awk create mode 100644 requirements.txt diff --git a/.idea/DendrobatesTinctoriusAzureus.iml b/.idea/DendrobatesTinctoriusAzureus.iml index da99253..9f85fe3 100644 --- a/.idea/DendrobatesTinctoriusAzureus.iml +++ b/.idea/DendrobatesTinctoriusAzureus.iml @@ -2,7 +2,7 @@ - + @@ -35,10 +35,11 @@ + - + \ No newline at end of file diff --git a/cache_utils/results-2020-04-07-5c025fb/analyse_csv.py b/cache_utils/results-2020-04-07-5c025fb/analyse_csv.py index ea6415f..f629126 100644 --- a/cache_utils/results-2020-04-07-5c025fb/analyse_csv.py +++ b/cache_utils/results-2020-04-07-5c025fb/analyse_csv.py @@ -1,4 +1,6 @@ -import pandas +import pandas as pd +import matplotlib.pyplot as plt +import seaborn as sns columns = ["Addr", "Hash"] core_number = 8 # FIXME @@ -7,8 +9,37 @@ for i in range(0, core_number): for op in ["Hit", "Miss"]: columns.append(op + str(i) + stat) columns.append("Hmm") -df = pandas.read_csv("citron-vert/combined.csv", header=0, names=columns) +df = pd.read_csv("citron-vert/combined.csv", header=0, names=columns) selected_columns = columns[:-1] df = df[selected_columns] print(df.head()) +median_columns = list(filter(lambda s: s.endswith("Med"), columns)) + +median_hits_col = list(filter(lambda s: s.startswith("Hit"), median_columns)) +median_miss_col = list(filter(lambda s: s.startswith("Miss"), median_columns)) + +print(list(median_columns)) +print(list(median_hits_col), list(median_miss_col)) + +hashes = df["Hash"].drop_duplicates() +print(hashes) + +#def distrib(x, y, **kwargs): +# sns.distplot() + +separate_core_df = df.melt(id_vars=["Addr", "Hash"], value_vars=median_hits_col) + +g = sns.FacetGrid(separate_core_df, row="variable") +g.map(sns.distplot, "value") +plt.figure() + +separate_core_df = df.melt(id_vars=["Addr", "Hash"], value_vars=median_miss_col) +g = sns.FacetGrid(separate_core_df, row="variable") +g.map(sns.distplot, "value", hist_kws={"range":(75,115)}) + +plt.show() + +#sns.distplot(df["values"], hist_kws={"weights": df["count"]}) + + diff --git a/cache_utils/results-2020-04-17/analyse.sh b/cache_utils/results-2020-04-17/analyse.sh new file mode 100755 index 0000000..f780115 --- /dev/null +++ b/cache_utils/results-2020-04-17/analyse.sh @@ -0,0 +1,6 @@ +#!/bin/sh +awk '/^Iteration [:digit:]*[.]*/ ' < log.txt > iterations.txt +rm results.csv +awk -f `dirname $0`/analyse_iterations.awk < iterations.txt # This uses system to split off awk scripts doing the analysis +grep -v -e "0,0$" results.csv > results_lite.csv +#paste -d"," *.csv > combined.csv diff --git a/cache_utils/results-2020-04-17/analyse_csv.py b/cache_utils/results-2020-04-17/analyse_csv.py new file mode 100644 index 0000000..83d6d30 --- /dev/null +++ b/cache_utils/results-2020-04-17/analyse_csv.py @@ -0,0 +1,63 @@ +import pandas as pd +import matplotlib.pyplot as plt +import seaborn as sns +from sys import exit + +columns = ["Addr", "Hash"] +core_number = 8 # FIXME +for i in range(0, core_number): + for stat in ["Min", "Med", "Max"]: + for op in ["Hit", "Miss"]: + columns.append(op + str(i) + stat) +columns.append("Hmm") +df = pd.read_csv("./results_lite.csv") +print(df.head()) + + +g = sns.FacetGrid(df, col="core", row="hash", legend_out=True) + + +def custom_hist(x,y, **kwargs): + sns.distplot(x, range(100,150), hist_kws={"weights": y, "histtype":"step"}, kde=False, **kwargs) + +g.map(custom_hist, "time", "clflush_hit") +# g.map(sns.distplot, "time", hist_kws={"weights": df["clflush_hit"]}, kde=False) +plt.show() + +# test = pd.DataFrame({"value" : [0, 5], "weight": [5, 1]}) +# plt.figure() +# sns.distplot(test["value"], hist_kws={"weights": test["weight"]}, kde=False) + +exit(0) + +selected_columns = columns[:-1] +df = df[selected_columns] +print(df.head()) + +median_columns = list(filter(lambda s: s.endswith("Med"), columns)) + +median_hits_col = list(filter(lambda s: s.startswith("Hit"), median_columns)) +median_miss_col = list(filter(lambda s: s.startswith("Miss"), median_columns)) + +print(list(median_columns)) +print(list(median_hits_col), list(median_miss_col)) + +hashes = df["Hash"].drop_duplicates() +print(hashes) + +# def distrib(x, y, **kwargs): +# sns.distplot() + +separate_core_df = df.melt(id_vars=["Addr", "Hash"], value_vars=median_hits_col) + +g = sns.FacetGrid(separate_core_df, row="variable") +g.map(sns.distplot, "value") +plt.figure() + +separate_core_df = df.melt(id_vars=["Addr", "Hash"], value_vars=median_miss_col) +g = sns.FacetGrid(separate_core_df, row="variable") +g.map(sns.distplot, "value", hist_kws={"range": (75, 115)}) + +plt.show() + +# sns.distplot(df["values"], hist_kws={"weights": df["count"]}) diff --git a/cache_utils/results-2020-04-17/analyse_iterations.awk b/cache_utils/results-2020-04-17/analyse_iterations.awk new file mode 100644 index 0000000..496ca5d --- /dev/null +++ b/cache_utils/results-2020-04-17/analyse_iterations.awk @@ -0,0 +1,16 @@ +BEGIN { + i = 0 +} +{ + start = $0 + getline + end = $0 + if (i == 0) { + # generate header + system("awk '$0 == \""start"\",$0 == \""end"\"' < log.txt | grep \"RESULT:\" | head -n 1 | cut -b 8- | awk '{print \"core,\" $0}'> results.csv") + } + cut = "cut -b 8- | tail -n +2" + + system("awk '$0 == \""start"\",$0 == \""end"\"' < log.txt | grep \"RESULT:\" | " cut " | awk '{print \""i",\" $0}'>> results.csv") + i = i + 1 +} diff --git a/cache_utils/results-2020-04-20/analyse.sh b/cache_utils/results-2020-04-20/analyse.sh new file mode 100755 index 0000000..f780115 --- /dev/null +++ b/cache_utils/results-2020-04-20/analyse.sh @@ -0,0 +1,6 @@ +#!/bin/sh +awk '/^Iteration [:digit:]*[.]*/ ' < log.txt > iterations.txt +rm results.csv +awk -f `dirname $0`/analyse_iterations.awk < iterations.txt # This uses system to split off awk scripts doing the analysis +grep -v -e "0,0$" results.csv > results_lite.csv +#paste -d"," *.csv > combined.csv diff --git a/cache_utils/results-2020-04-20/analyse_csv.py b/cache_utils/results-2020-04-20/analyse_csv.py new file mode 100644 index 0000000..e9c4dfc --- /dev/null +++ b/cache_utils/results-2020-04-20/analyse_csv.py @@ -0,0 +1,52 @@ +import pandas as pd +import matplotlib.pyplot as plt +import seaborn as sns +from sys import exit +import wquantiles as wq + +df = pd.read_csv("./results_lite.csv") +print(df.head()) + + +g = sns.FacetGrid(df, col="core", row="hash", legend_out=True) + + +def custom_hist(x, y1, y2, **kwargs): + sns.distplot(x,range(200, 280), hist_kws={"weights": y1, "histtype":"step"}, kde=False, **kwargs) + kwargs["color"] = "r" + sns.distplot(x, range(200, 280), hist_kws={"weights": y2, "histtype":"step"}, kde=False, **kwargs) + +g.map(custom_hist, "time", "clflush_hit", "clflush_miss") +# g.map(sns.distplot, "time", hist_kws={"weights": df["clflush_hit"]}, kde=False) + +plt.figure() + +def stat(x, key): + return wq.median(x["time"], x[key]) + + +miss = df.groupby(["core", "hash"]).apply(stat, "clflush_miss") +stats = miss.reset_index() +stats.columns = ["Core", "Hash", "Miss"] +hit = df.groupby(["core", "hash"]).apply(stat, "clflush_hit") +stats["Hit"] = hit.values + + +print(stats.to_string()) + +g = sns.FacetGrid(stats, row="Core") + +g.map(sns.distplot, 'Miss', bins=range(200, 280), color="r") +g.map(sns.distplot, 'Hit', bins=range(200, 280)) +plt.show() + +#stats["clflush_miss_med"] = stats[[0]].apply(lambda x: x["miss_med"]) +#stats["clflush_hit_med"] = stats[[0]].apply(lambda x: x["hit_med"]) +#del df[[0]] +#print(hit.to_string(), miss.to_string()) + +# test = pd.DataFrame({"value" : [0, 5], "weight": [5, 1]}) +# plt.figure() +# sns.distplot(test["value"], hist_kws={"weights": test["weight"]}, kde=False) + +exit(0) diff --git a/cache_utils/results-2020-04-20/analyse_iterations.awk b/cache_utils/results-2020-04-20/analyse_iterations.awk new file mode 100644 index 0000000..496ca5d --- /dev/null +++ b/cache_utils/results-2020-04-20/analyse_iterations.awk @@ -0,0 +1,16 @@ +BEGIN { + i = 0 +} +{ + start = $0 + getline + end = $0 + if (i == 0) { + # generate header + system("awk '$0 == \""start"\",$0 == \""end"\"' < log.txt | grep \"RESULT:\" | head -n 1 | cut -b 8- | awk '{print \"core,\" $0}'> results.csv") + } + cut = "cut -b 8- | tail -n +2" + + system("awk '$0 == \""start"\",$0 == \""end"\"' < log.txt | grep \"RESULT:\" | " cut " | awk '{print \""i",\" $0}'>> results.csv") + i = i + 1 +} diff --git a/cache_utils/src/calibration.rs b/cache_utils/src/calibration.rs index c3590eb..c0e1c93 100644 --- a/cache_utils/src/calibration.rs +++ b/cache_utils/src/calibration.rs @@ -170,8 +170,16 @@ pub fn calibrate_flush( cache_line_size, array.len() as isize, &[ - (load_and_flush, "clflush hit"), - (flush_and_flush, "clflush miss"), + CalibrateOperation { + op: load_and_flush, + name: "clflush_hit", + display_name: "clflush hit", + }, + CalibrateOperation { + op: flush_and_flush, + name: "clflush_miss", + display_name: "clflush miss", + }, ], CFLUSH_BUCKET_NUMBER, CFLUSH_BUCKET_SIZE, @@ -189,11 +197,17 @@ pub struct CalibrateResult { max: Vec, } +pub struct CalibrateOperation<'a> { + pub op: unsafe fn(*const u8) -> u64, + pub name: &'a str, + pub display_name: &'a str, +} + pub unsafe fn calibrate( p: *const u8, increment: usize, len: isize, - operations: &[(unsafe fn(*const u8) -> u64, &str)], + operations: &[CalibrateOperation], buckets_num: usize, bucket_size: usize, num_iterations: u32, @@ -216,7 +230,7 @@ fn calibrate_impl( p: *const u8, increment: usize, len: isize, - operations: &[(unsafe fn(*const u8) -> u64, &str)], + operations: &[CalibrateOperation], buckets_num: usize, bucket_size: usize, num_iterations: u32, @@ -235,7 +249,10 @@ fn calibrate_impl( if verbosity_level >= Thresholds { println!( "Calibrating {}...", - operations.iter().map(|(_, name)| { name }).format(", ") + operations + .iter() + .map(|operation| { operation.display_name }) + .format(", ") ); } @@ -245,11 +262,30 @@ fn calibrate_impl( if verbosity_level >= Thresholds { println!( "CSV: address, hash, {} min, {} median, {} max", - operations.iter().map(|(_, name)| name).format(" min, "), - operations.iter().map(|(_, name)| name).format(" median, "), - operations.iter().map(|(_, name)| name).format(" max, ") + operations + .iter() + .map(|operation| operation.name) + .format(" min, "), + operations + .iter() + .map(|operation| operation.name) + .format(" median, "), + operations + .iter() + .map(|operation| operation.name) + .format(" max, ") ); } + if verbosity_level >= RawResult { + println!( + "RESULT:address,hash,time,{}", + operations + .iter() + .map(|operation| operation.name) + .format(",") + ); + } + for i in (0..len).step_by(increment) { let pointer = unsafe { p.offset(i) }; let hash = hasher.hash(pointer as usize); @@ -271,7 +307,7 @@ fn calibrate_impl( for op in operations { let mut hist = vec![0; buckets_num]; for _ in 0..num_iterations { - let time = unsafe { op.0(pointer) }; + let time = unsafe { (op.op)(pointer) }; let bucket = min(buckets_num - 1, to_bucket(time)); hist[bucket] += 1; } @@ -286,16 +322,9 @@ fn calibrate_impl( .map(|h| (num_iterations - h[buckets_num - 1]) / 2) .collect(); - if verbosity_level >= RawResult { - println!( - "time {}", - operations.iter().map(|(_, name)| name).format(" ") - ); - } - for j in 0..buckets_num - 1 { if verbosity_level >= RawResult { - print!("{:3}:", from_bucket(j)); + print!("RESULT:{:p},{:x},{}", pointer, hash, from_bucket(j)); } // ignore the last bucket : spurious context switches etc. for op in 0..operations.len() { @@ -305,7 +334,7 @@ fn calibrate_impl( let med = &mut calibrate_result.median[op]; let sum = &mut sums[op]; if verbosity_level >= RawResult { - print!("{:10}", hist); + print!(",{}", hist); } if *min == 0 { @@ -329,10 +358,10 @@ fn calibrate_impl( } } if verbosity_level >= Thresholds { - for (j, (_, op)) in operations.iter().enumerate() { + for (j, op) in operations.iter().enumerate() { println!( "{}: min {}, median {}, max {}", - op, + op.display_name, calibrate_result.min[j], calibrate_result.median[j], calibrate_result.max[j] @@ -367,7 +396,11 @@ pub fn calibrate_L3_miss_hit( pointer, cache_line_size, array.len() as isize, - &[(l3_and_reload, "L3 hit")], + &[CalibrateOperation { + op: l3_and_reload, + name: "l3_hit", + display_name: "L3 hit", + }], 512, 2, 1 << 11, diff --git a/cache_utils/src/lib.rs b/cache_utils/src/lib.rs index 4532fea..a48a6e4 100644 --- a/cache_utils/src/lib.rs +++ b/cache_utils/src/lib.rs @@ -1,5 +1,6 @@ #![cfg_attr(feature = "no_std", no_std)] #![feature(ptr_internals)] +#![allow(clippy::missing_safety_doc)] use static_assertions::assert_cfg; diff --git a/cache_utils/src/main.rs b/cache_utils/src/main.rs index 6781bea..c935e5e 100644 --- a/cache_utils/src/main.rs +++ b/cache_utils/src/main.rs @@ -41,6 +41,31 @@ pub fn main() { // Let's grab all the list of CPUS // Then iterate the calibration on each CPU core. + eprint!("Warming up..."); + for i in 0..(CpuSet::count() - 1) { + if old.is_set(i).unwrap() { + //println!("Iteration {}...", i); + let mut core = CpuSet::new(); + core.set(i).unwrap(); + + match sched_setaffinity(Pid::from_raw(0), &core) { + Ok(()) => { + calibrate_flush(array, 64, Verbosity::NoOutput); + sched_setaffinity(Pid::from_raw(0), &old).unwrap(); + //println!("Iteration {}...ok ", i); + eprint!(" {}", i); + } + Err(Sys(Errno::EINVAL)) => { + //println!("skipping"); + continue; + } + Err(e) => { + panic!("Unexpected error while setting affinity: {}", e); + } + } + } + } + eprintln!(); for i in 0..(CpuSet::count() - 1) { if old.is_set(i).unwrap() { println!("Iteration {}...", i); @@ -50,7 +75,7 @@ pub fn main() { match sched_setaffinity(Pid::from_raw(0), &core) { Ok(()) => { calibrate_flush(array, 64, Verbosity::NoOutput); - calibrate_flush(array, 64, Verbosity::Thresholds); + calibrate_flush(array, 64, Verbosity::RawResult); sched_setaffinity(Pid::from_raw(0), &old).unwrap(); println!("Iteration {}...ok ", i); eprintln!("Iteration {}...ok ", i); diff --git a/requirements.txt b/requirements.txt new file mode 100644 index 0000000..1ac0819 --- /dev/null +++ b/requirements.txt @@ -0,0 +1,3 @@ +pandas==1.0.3 +seaborn==0.10.0 +wquantiles==0.5