General refactor of the calibration implementation when adding l3 hit calibration

This moves most of the logic on a calibrate function taking as a paramater a slice of operations to calibrate
L3 hit is measured by flush followed by preftechnt1, cpuid serialization, timed access
This commit is contained in:
guillaume didier 2020-04-01 16:12:15 +02:00
parent 78ed3bafad
commit 65f94dcb67
3 changed files with 216 additions and 130 deletions

16
Cargo.lock generated
View File

@ -32,6 +32,7 @@ dependencies = [
name = "cache_utils"
version = "0.1.0"
dependencies = [
"itertools 0.9.0 (registry+https://github.com/rust-lang/crates.io-index)",
"nix 0.17.0 (registry+https://github.com/rust-lang/crates.io-index)",
"polling_serial 0.1.0",
"static_assertions 1.1.0 (registry+https://github.com/rust-lang/crates.io-index)",
@ -64,6 +65,19 @@ dependencies = [
"x86_64 0.9.6 (registry+https://github.com/rust-lang/crates.io-index)",
]
[[package]]
name = "either"
version = "1.5.3"
source = "registry+https://github.com/rust-lang/crates.io-index"
[[package]]
name = "itertools"
version = "0.9.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
dependencies = [
"either 1.5.3 (registry+https://github.com/rust-lang/crates.io-index)",
]
[[package]]
name = "lazy_static"
version = "1.4.0"
@ -173,6 +187,8 @@ dependencies = [
"checksum bootloader 0.9.0 (registry+https://github.com/rust-lang/crates.io-index)" = "47a31d0af1b59774ea1470bf40b4bf9fed0bbead2f2d9d614c4c2e13382414dd"
"checksum cc 1.0.50 (registry+https://github.com/rust-lang/crates.io-index)" = "95e28fa049fda1c330bcf9d723be7663a899c4679724b34c81e9f5a326aab8cd"
"checksum cfg-if 0.1.10 (registry+https://github.com/rust-lang/crates.io-index)" = "4785bdd1c96b2a846b2bd7cc02e86b6b3dbf14e7e53446c4f54c92a361040822"
"checksum either 1.5.3 (registry+https://github.com/rust-lang/crates.io-index)" = "bb1f6b1ce1c140482ea30ddd3335fc0024ac7ee112895426e0a629a6c20adfe3"
"checksum itertools 0.9.0 (registry+https://github.com/rust-lang/crates.io-index)" = "284f18f85651fe11e8a991b2adb42cb078325c996ed026d994719efcfca1d54b"
"checksum lazy_static 1.4.0 (registry+https://github.com/rust-lang/crates.io-index)" = "e2abad23fbc42b3700f2f279844dc832adb2b2eb069b2df918f455c4e18cc646"
"checksum libc 0.2.68 (registry+https://github.com/rust-lang/crates.io-index)" = "dea0c0405123bba743ee3f91f49b1c7cfb684eef0da0a50110f758ccf24cdff0"
"checksum linked_list_allocator 0.8.1 (registry+https://github.com/rust-lang/crates.io-index)" = "5825aea823c659d0fdcdbe8c9b78baf56f3a10365d783db874f6d360df72626f"

View File

@ -11,11 +11,13 @@ polling_serial = { path = "../polling_serial", optional = true }
vga_buffer = { path = "../vga_buffer", optional = true }
x86_64 = "0.9.2"
static_assertions = "1.1.0"
itertools = { version = "0.9.0", default-features = false }
nix = { version = "0.17.0", optional = true }
[features]
std = ["nix"]
std = ["nix", "itertools/use_std"]
no_std = ["polling_serial", "vga_buffer"]
default = ["std"]

View File

@ -1,5 +1,6 @@
use crate::{flush, maccess, rdtsc_fence};
use core::arch::x86_64 as arch_x86;
#[cfg(feature = "no_std")]
use polling_serial::serial_println as println;
@ -16,10 +17,7 @@ use crate::calibration::Verbosity::{Debug, NoOutput, RawResult, Thresholds};
use alloc::vec;
use alloc::vec::Vec;
use core::cmp::min;
// calibration, todo
// this will require getting a nice page to do some amusing stuff on it.
// it will have to return some results later.
use itertools::Itertools;
pub unsafe fn only_reload(p: *const u8) -> u64 {
let t = rdtsc_fence();
@ -53,6 +51,16 @@ pub unsafe fn only_flush(p: *const u8) -> u64 {
rdtsc_fence() - t
}
pub unsafe fn l3_and_reload(p: *const u8) -> u64 {
flush(p);
arch_x86::_mm_mfence();
arch_x86::_mm_prefetch(p as *const i8, arch_x86::_MM_HINT_T2);
arch_x86::__cpuid_count(0, 0);
let t = rdtsc_fence();
maccess(p);
rdtsc_fence() - t
}
const BUCKET_SIZE: usize = 5;
const BUCKET_NUMBER: usize = 250;
@ -141,7 +149,7 @@ pub fn calibrate_access(array: &[u8; 4096]) -> u64 {
const CFLUSH_BUCKET_SIZE: usize = 1;
const CFLUSH_BUCKET_NUMBER: usize = 500;
const CFLUSH_NUM_ITER: usize = 1 << 11;
const CFLUSH_NUM_ITER: u32 = 1 << 11;
const CFLUSH_SPURIOUS_THRESHOLD: usize = 1;
/* TODO Code cleanup :
@ -156,149 +164,209 @@ pub fn calibrate_flush(
array: &[u8],
cache_line_size: usize,
verbose_level: Verbosity,
) -> Vec<(usize, Vec<(usize, usize)>, usize)> {
if verbose_level > NoOutput {
println!("Calibrating cflush...");
}
let mut ret = Vec::new();
// Allocate a target array
// TBD why size, why the position in the array, why the type (usize)
//let mut array = Vec::<usize>::with_capacity(5 << 10);
//array.resize(5 << 10, 1);
//let array = array.into_boxed_slice();
// Histograms bucket of 5 and max at 400 cycles
// Magic numbers to be justified
// 80 is a size of screen
// the address in memory we are going to target
) -> Vec<CalibrateResult> {
let pointer = (&array[0]) as *const u8;
if pointer as usize & (cache_line_size - 1) != 0 {
panic!("not aligned nicely");
}
// do a large sample of accesses to a cached line
for i in (0..(array.len() as isize)).step_by(cache_line_size) {
let mut hit_histogram = vec![0; CFLUSH_BUCKET_NUMBER];
let mut miss_histogram = hit_histogram.clone();
if verbose_level >= Thresholds {
println!("Calibration for {:p}", unsafe { pointer.offset(i) });
}
unsafe { load_and_flush(pointer.offset(i)) }; // align down on 64 bytes
for _ in 1..CFLUSH_NUM_ITER {
let d = unsafe { load_and_flush(pointer.offset(i)) } as usize;
hit_histogram[min(CFLUSH_BUCKET_NUMBER - 1, d / CFLUSH_BUCKET_SIZE) as usize] += 1;
}
calibrate_impl(
pointer,
cache_line_size,
array.len() as isize,
&[
(load_and_flush, "clflush hit"),
(flush_and_flush, "clflush miss"),
],
CFLUSH_BUCKET_NUMBER,
CFLUSH_BUCKET_SIZE,
CFLUSH_NUM_ITER,
verbose_level,
)
}
// do a large numer of accesses to uncached line
unsafe { flush(pointer.offset(i)) };
#[derive(Debug)]
pub struct CalibrateResult {
offset: isize,
histogram: Vec<Vec<u32>>,
median: Vec<u64>,
min: Vec<u64>,
max: Vec<u64>,
}
unsafe { load_and_flush(pointer.offset(i)) };
for _ in 0..CFLUSH_NUM_ITER {
let d = unsafe { flush_and_flush(pointer.offset(i)) } as usize;
miss_histogram[min(CFLUSH_BUCKET_NUMBER - 1, d / CFLUSH_BUCKET_SIZE) as usize] += 1;
}
pub unsafe fn calibrate(
p: *const u8,
increment: usize,
len: isize,
operations: &[(unsafe fn(*const u8) -> u64, &str)],
buckets_num: usize,
bucket_size: usize,
num_iterations: u32,
verbosity_level: Verbosity,
) -> Vec<CalibrateResult> {
calibrate_impl(
p,
increment,
len,
operations,
buckets_num,
bucket_size,
num_iterations,
verbosity_level,
)
}
// extract min, max, & median of the distribution.
// set the threshold to mid point between miss max & hit min.
// determine :
// Hit min, max, median
// Miss min, miss max, median
// If there is no overlap the threshold is trivial
// If there is Grab the point where the ratio is balanced
let mut hit_min = 0;
let mut hit_max = 0;
let mut miss_min = 0;
let mut miss_max = 0;
let mut miss_med = 0;
let mut hit_med = 0;
let mut hit_sum = 0;
let mut miss_sum = 0;
//let mut hit_max: (usize, u32) = (0, 0);
//let mut miss_max: (usize, u32) = (0, 0);
for i in 0..(hit_histogram.len() - 1) {
// ignore the last bucket, spurious context switches
if verbose_level >= RawResult {
const SPURIOUS_THRESHOLD: u32 = 1;
fn calibrate_impl(
p: *const u8,
increment: usize,
len: isize,
operations: &[(unsafe fn(*const u8) -> u64, &str)],
buckets_num: usize,
bucket_size: usize,
num_iterations: u32,
verbosity_level: Verbosity,
) -> Vec<CalibrateResult> {
if verbosity_level >= Thresholds {
println!(
"{:3}: {:10} {:10}",
i * CFLUSH_BUCKET_SIZE,
hit_histogram[i],
miss_histogram[i]
"Calibrating {}...",
operations.iter().map(|(_, name)| { name }).format(", ")
);
}
for (min, max, med, sum, hist) in &mut [
(
&mut hit_min,
&mut hit_max,
&mut hit_med,
&mut hit_sum,
&hit_histogram,
),
(
&mut miss_min,
&mut miss_max,
&mut miss_med,
&mut miss_sum,
&miss_histogram,
),
] {
if **min == 0 {
// looking for min
if hist[i] > CFLUSH_SPURIOUS_THRESHOLD {
**min = i;
}
} else {
// min found, looking for max
if hist[i] > CFLUSH_SPURIOUS_THRESHOLD {
**max = i;
let to_bucket = |time: u64| -> usize { time as usize / bucket_size };
let from_bucket = |bucket: usize| -> u64 { (bucket * bucket_size) as u64 };
let mut ret = Vec::new();
if verbosity_level >= Thresholds {
println!(
"CSV: address, {} min, {} median, {} max",
operations.iter().map(|(_, name)| name).format(" min, "),
operations.iter().map(|(_, name)| name).format(" median, "),
operations.iter().map(|(_, name)| name).format(" max, ")
);
}
for i in (0..len).step_by(increment) {
let pointer = unsafe { p.offset(i) };
if verbosity_level >= Thresholds {
println!("Calibration for {:p}", pointer);
}
if **med == 0 {
**sum += hist[i];
if **sum >= (CFLUSH_NUM_ITER - hist[hist.len() - 1]) / 2 {
**med = i;
}
}
}
if verbose_level >= Debug {
println!("sum hit {} miss {}", hit_sum, miss_sum);
// TODO add some useful impl to CalibrateResults
let mut calibrate_result = CalibrateResult {
offset: i,
histogram: Vec::new(),
median: vec![0; operations.len()],
min: vec![0; operations.len()],
max: vec![0; operations.len()],
};
calibrate_result.histogram.reserve(operations.len());
for op in operations {
let mut hist = vec![0; buckets_num];
for _ in 0..num_iterations {
let time = unsafe { op.0(pointer) };
let bucket = min(buckets_num - 1, to_bucket(time));
hist[bucket] += 1;
}
calibrate_result.histogram.push(hist);
}
if verbose_level >= Thresholds {
println!("Hits: min {} max {} med {}", hit_min, hit_max, hit_med);
println!("Miss: min {} max {} med {}", miss_min, miss_max, miss_med);
}
//println!("Miss max {}", miss_max.0 * CFLUSH_BUCKET_SIZE);
//println!("Max hit {}", hit_max.0 * CFLUSH_BUCKET_SIZE);
let mut threshold: (usize, u32) = (0, u32::max_value());
/*for i in miss_max.0..hit_max.0 {
if hit_histogram[i] + miss_histogram[i] < threshold.1 {
threshold = (i, hit_histogram[i] + miss_histogram[i]);
}
}*/
if verbose_level > NoOutput {
println!("Threshold {}", threshold.0 * CFLUSH_BUCKET_SIZE);
println!("Calibration done.");
}
let mut sums = vec![0; operations.len()];
ret.push((
i as usize,
hit_histogram
let median_thresholds: Vec<u32> = calibrate_result
.histogram
.iter()
.zip(&miss_histogram)
.map(|(&x, &y)| (x, y))
.collect(),
threshold.0,
));
.map(|h| (num_iterations - h[buckets_num - 1]) / 2)
.collect();
if verbosity_level >= RawResult {
println!(
"time {}",
operations.iter().map(|(_, name)| name).format(" ")
);
}
for j in 0..buckets_num - 1 {
if verbosity_level >= RawResult {
print!("{:3}:", from_bucket(j));
}
// ignore the last bucket : spurious context switches etc.
for op in 0..operations.len() {
let hist = &calibrate_result.histogram[op][j];
let min = &mut calibrate_result.min[op];
let max = &mut calibrate_result.max[op];
let med = &mut calibrate_result.median[op];
let sum = &mut sums[op];
if verbosity_level >= RawResult {
print!("{:10}", hist);
}
if *min == 0 {
// looking for min
if *hist > SPURIOUS_THRESHOLD {
*min = from_bucket(j);
}
} else if *hist > SPURIOUS_THRESHOLD {
*max = from_bucket(j);
}
if *med == 0 {
*sum += *hist;
if *sum >= median_thresholds[op] {
*med = from_bucket(j);
}
}
}
if verbosity_level >= RawResult {
println!();
}
}
if verbosity_level >= Thresholds {
for (j, (_, op)) in operations.iter().enumerate() {
println!(
"{}: min {}, median {}, max {}",
op,
calibrate_result.min[j],
calibrate_result.median[j],
calibrate_result.max[j]
);
}
println!(
"CSV: {:p}, {}, {}, {}",
pointer,
calibrate_result.min.iter().format(", "),
calibrate_result.median.iter().format(", "),
calibrate_result.max.iter().format(", ")
);
}
ret.push(calibrate_result);
}
ret
}
#[allow(non_snake_case)]
pub fn calibrate_L3_miss_hit(
array: &[u8],
cache_line_size: usize,
verbose_level: Verbosity,
) -> CalibrateResult {
if verbose_level > NoOutput {
println!("Calibrating L3 access...");
}
let pointer = (&array[0]) as *const u8;
let r = calibrate_impl(
pointer,
cache_line_size,
array.len() as isize,
&[(l3_and_reload, "L3 hit")],
512,
2,
1 << 11,
verbose_level,
);
r.into_iter().next().unwrap()
}