General refactor of the calibration implementation when adding l3 hit calibration
This moves most of the logic on a calibrate function taking as a paramater a slice of operations to calibrate L3 hit is measured by flush followed by preftechnt1, cpuid serialization, timed access
This commit is contained in:
parent
78ed3bafad
commit
65f94dcb67
16
Cargo.lock
generated
16
Cargo.lock
generated
@ -32,6 +32,7 @@ dependencies = [
|
||||
name = "cache_utils"
|
||||
version = "0.1.0"
|
||||
dependencies = [
|
||||
"itertools 0.9.0 (registry+https://github.com/rust-lang/crates.io-index)",
|
||||
"nix 0.17.0 (registry+https://github.com/rust-lang/crates.io-index)",
|
||||
"polling_serial 0.1.0",
|
||||
"static_assertions 1.1.0 (registry+https://github.com/rust-lang/crates.io-index)",
|
||||
@ -64,6 +65,19 @@ dependencies = [
|
||||
"x86_64 0.9.6 (registry+https://github.com/rust-lang/crates.io-index)",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "either"
|
||||
version = "1.5.3"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
|
||||
[[package]]
|
||||
name = "itertools"
|
||||
version = "0.9.0"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
dependencies = [
|
||||
"either 1.5.3 (registry+https://github.com/rust-lang/crates.io-index)",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "lazy_static"
|
||||
version = "1.4.0"
|
||||
@ -173,6 +187,8 @@ dependencies = [
|
||||
"checksum bootloader 0.9.0 (registry+https://github.com/rust-lang/crates.io-index)" = "47a31d0af1b59774ea1470bf40b4bf9fed0bbead2f2d9d614c4c2e13382414dd"
|
||||
"checksum cc 1.0.50 (registry+https://github.com/rust-lang/crates.io-index)" = "95e28fa049fda1c330bcf9d723be7663a899c4679724b34c81e9f5a326aab8cd"
|
||||
"checksum cfg-if 0.1.10 (registry+https://github.com/rust-lang/crates.io-index)" = "4785bdd1c96b2a846b2bd7cc02e86b6b3dbf14e7e53446c4f54c92a361040822"
|
||||
"checksum either 1.5.3 (registry+https://github.com/rust-lang/crates.io-index)" = "bb1f6b1ce1c140482ea30ddd3335fc0024ac7ee112895426e0a629a6c20adfe3"
|
||||
"checksum itertools 0.9.0 (registry+https://github.com/rust-lang/crates.io-index)" = "284f18f85651fe11e8a991b2adb42cb078325c996ed026d994719efcfca1d54b"
|
||||
"checksum lazy_static 1.4.0 (registry+https://github.com/rust-lang/crates.io-index)" = "e2abad23fbc42b3700f2f279844dc832adb2b2eb069b2df918f455c4e18cc646"
|
||||
"checksum libc 0.2.68 (registry+https://github.com/rust-lang/crates.io-index)" = "dea0c0405123bba743ee3f91f49b1c7cfb684eef0da0a50110f758ccf24cdff0"
|
||||
"checksum linked_list_allocator 0.8.1 (registry+https://github.com/rust-lang/crates.io-index)" = "5825aea823c659d0fdcdbe8c9b78baf56f3a10365d783db874f6d360df72626f"
|
||||
|
@ -11,11 +11,13 @@ polling_serial = { path = "../polling_serial", optional = true }
|
||||
vga_buffer = { path = "../vga_buffer", optional = true }
|
||||
x86_64 = "0.9.2"
|
||||
static_assertions = "1.1.0"
|
||||
itertools = { version = "0.9.0", default-features = false }
|
||||
|
||||
nix = { version = "0.17.0", optional = true }
|
||||
|
||||
|
||||
[features]
|
||||
std = ["nix"]
|
||||
std = ["nix", "itertools/use_std"]
|
||||
no_std = ["polling_serial", "vga_buffer"]
|
||||
|
||||
default = ["std"]
|
||||
|
@ -1,5 +1,6 @@
|
||||
use crate::{flush, maccess, rdtsc_fence};
|
||||
|
||||
use core::arch::x86_64 as arch_x86;
|
||||
#[cfg(feature = "no_std")]
|
||||
use polling_serial::serial_println as println;
|
||||
|
||||
@ -16,10 +17,7 @@ use crate::calibration::Verbosity::{Debug, NoOutput, RawResult, Thresholds};
|
||||
use alloc::vec;
|
||||
use alloc::vec::Vec;
|
||||
use core::cmp::min;
|
||||
|
||||
// calibration, todo
|
||||
// this will require getting a nice page to do some amusing stuff on it.
|
||||
// it will have to return some results later.
|
||||
use itertools::Itertools;
|
||||
|
||||
pub unsafe fn only_reload(p: *const u8) -> u64 {
|
||||
let t = rdtsc_fence();
|
||||
@ -53,6 +51,16 @@ pub unsafe fn only_flush(p: *const u8) -> u64 {
|
||||
rdtsc_fence() - t
|
||||
}
|
||||
|
||||
pub unsafe fn l3_and_reload(p: *const u8) -> u64 {
|
||||
flush(p);
|
||||
arch_x86::_mm_mfence();
|
||||
arch_x86::_mm_prefetch(p as *const i8, arch_x86::_MM_HINT_T2);
|
||||
arch_x86::__cpuid_count(0, 0);
|
||||
let t = rdtsc_fence();
|
||||
maccess(p);
|
||||
rdtsc_fence() - t
|
||||
}
|
||||
|
||||
const BUCKET_SIZE: usize = 5;
|
||||
const BUCKET_NUMBER: usize = 250;
|
||||
|
||||
@ -141,7 +149,7 @@ pub fn calibrate_access(array: &[u8; 4096]) -> u64 {
|
||||
const CFLUSH_BUCKET_SIZE: usize = 1;
|
||||
const CFLUSH_BUCKET_NUMBER: usize = 500;
|
||||
|
||||
const CFLUSH_NUM_ITER: usize = 1 << 11;
|
||||
const CFLUSH_NUM_ITER: u32 = 1 << 11;
|
||||
const CFLUSH_SPURIOUS_THRESHOLD: usize = 1;
|
||||
|
||||
/* TODO Code cleanup :
|
||||
@ -156,149 +164,209 @@ pub fn calibrate_flush(
|
||||
array: &[u8],
|
||||
cache_line_size: usize,
|
||||
verbose_level: Verbosity,
|
||||
) -> Vec<(usize, Vec<(usize, usize)>, usize)> {
|
||||
if verbose_level > NoOutput {
|
||||
println!("Calibrating cflush...");
|
||||
}
|
||||
let mut ret = Vec::new();
|
||||
// Allocate a target array
|
||||
// TBD why size, why the position in the array, why the type (usize)
|
||||
//let mut array = Vec::<usize>::with_capacity(5 << 10);
|
||||
//array.resize(5 << 10, 1);
|
||||
|
||||
//let array = array.into_boxed_slice();
|
||||
|
||||
// Histograms bucket of 5 and max at 400 cycles
|
||||
// Magic numbers to be justified
|
||||
// 80 is a size of screen
|
||||
|
||||
// the address in memory we are going to target
|
||||
) -> Vec<CalibrateResult> {
|
||||
let pointer = (&array[0]) as *const u8;
|
||||
|
||||
if pointer as usize & (cache_line_size - 1) != 0 {
|
||||
panic!("not aligned nicely");
|
||||
}
|
||||
// do a large sample of accesses to a cached line
|
||||
for i in (0..(array.len() as isize)).step_by(cache_line_size) {
|
||||
let mut hit_histogram = vec![0; CFLUSH_BUCKET_NUMBER];
|
||||
|
||||
let mut miss_histogram = hit_histogram.clone();
|
||||
if verbose_level >= Thresholds {
|
||||
println!("Calibration for {:p}", unsafe { pointer.offset(i) });
|
||||
}
|
||||
unsafe { load_and_flush(pointer.offset(i)) }; // align down on 64 bytes
|
||||
for _ in 1..CFLUSH_NUM_ITER {
|
||||
let d = unsafe { load_and_flush(pointer.offset(i)) } as usize;
|
||||
hit_histogram[min(CFLUSH_BUCKET_NUMBER - 1, d / CFLUSH_BUCKET_SIZE) as usize] += 1;
|
||||
}
|
||||
calibrate_impl(
|
||||
pointer,
|
||||
cache_line_size,
|
||||
array.len() as isize,
|
||||
&[
|
||||
(load_and_flush, "clflush hit"),
|
||||
(flush_and_flush, "clflush miss"),
|
||||
],
|
||||
CFLUSH_BUCKET_NUMBER,
|
||||
CFLUSH_BUCKET_SIZE,
|
||||
CFLUSH_NUM_ITER,
|
||||
verbose_level,
|
||||
)
|
||||
}
|
||||
|
||||
// do a large numer of accesses to uncached line
|
||||
unsafe { flush(pointer.offset(i)) };
|
||||
#[derive(Debug)]
|
||||
pub struct CalibrateResult {
|
||||
offset: isize,
|
||||
histogram: Vec<Vec<u32>>,
|
||||
median: Vec<u64>,
|
||||
min: Vec<u64>,
|
||||
max: Vec<u64>,
|
||||
}
|
||||
|
||||
unsafe { load_and_flush(pointer.offset(i)) };
|
||||
for _ in 0..CFLUSH_NUM_ITER {
|
||||
let d = unsafe { flush_and_flush(pointer.offset(i)) } as usize;
|
||||
miss_histogram[min(CFLUSH_BUCKET_NUMBER - 1, d / CFLUSH_BUCKET_SIZE) as usize] += 1;
|
||||
}
|
||||
pub unsafe fn calibrate(
|
||||
p: *const u8,
|
||||
increment: usize,
|
||||
len: isize,
|
||||
operations: &[(unsafe fn(*const u8) -> u64, &str)],
|
||||
buckets_num: usize,
|
||||
bucket_size: usize,
|
||||
num_iterations: u32,
|
||||
verbosity_level: Verbosity,
|
||||
) -> Vec<CalibrateResult> {
|
||||
calibrate_impl(
|
||||
p,
|
||||
increment,
|
||||
len,
|
||||
operations,
|
||||
buckets_num,
|
||||
bucket_size,
|
||||
num_iterations,
|
||||
verbosity_level,
|
||||
)
|
||||
}
|
||||
|
||||
// extract min, max, & median of the distribution.
|
||||
// set the threshold to mid point between miss max & hit min.
|
||||
|
||||
// determine :
|
||||
// Hit min, max, median
|
||||
// Miss min, miss max, median
|
||||
// If there is no overlap the threshold is trivial
|
||||
// If there is Grab the point where the ratio is balanced
|
||||
|
||||
let mut hit_min = 0;
|
||||
let mut hit_max = 0;
|
||||
let mut miss_min = 0;
|
||||
let mut miss_max = 0;
|
||||
let mut miss_med = 0;
|
||||
let mut hit_med = 0;
|
||||
let mut hit_sum = 0;
|
||||
let mut miss_sum = 0;
|
||||
|
||||
//let mut hit_max: (usize, u32) = (0, 0);
|
||||
//let mut miss_max: (usize, u32) = (0, 0);
|
||||
|
||||
for i in 0..(hit_histogram.len() - 1) {
|
||||
// ignore the last bucket, spurious context switches
|
||||
if verbose_level >= RawResult {
|
||||
const SPURIOUS_THRESHOLD: u32 = 1;
|
||||
fn calibrate_impl(
|
||||
p: *const u8,
|
||||
increment: usize,
|
||||
len: isize,
|
||||
operations: &[(unsafe fn(*const u8) -> u64, &str)],
|
||||
buckets_num: usize,
|
||||
bucket_size: usize,
|
||||
num_iterations: u32,
|
||||
verbosity_level: Verbosity,
|
||||
) -> Vec<CalibrateResult> {
|
||||
if verbosity_level >= Thresholds {
|
||||
println!(
|
||||
"{:3}: {:10} {:10}",
|
||||
i * CFLUSH_BUCKET_SIZE,
|
||||
hit_histogram[i],
|
||||
miss_histogram[i]
|
||||
"Calibrating {}...",
|
||||
operations.iter().map(|(_, name)| { name }).format(", ")
|
||||
);
|
||||
}
|
||||
|
||||
for (min, max, med, sum, hist) in &mut [
|
||||
(
|
||||
&mut hit_min,
|
||||
&mut hit_max,
|
||||
&mut hit_med,
|
||||
&mut hit_sum,
|
||||
&hit_histogram,
|
||||
),
|
||||
(
|
||||
&mut miss_min,
|
||||
&mut miss_max,
|
||||
&mut miss_med,
|
||||
&mut miss_sum,
|
||||
&miss_histogram,
|
||||
),
|
||||
] {
|
||||
if **min == 0 {
|
||||
// looking for min
|
||||
if hist[i] > CFLUSH_SPURIOUS_THRESHOLD {
|
||||
**min = i;
|
||||
}
|
||||
} else {
|
||||
// min found, looking for max
|
||||
if hist[i] > CFLUSH_SPURIOUS_THRESHOLD {
|
||||
**max = i;
|
||||
let to_bucket = |time: u64| -> usize { time as usize / bucket_size };
|
||||
let from_bucket = |bucket: usize| -> u64 { (bucket * bucket_size) as u64 };
|
||||
let mut ret = Vec::new();
|
||||
if verbosity_level >= Thresholds {
|
||||
println!(
|
||||
"CSV: address, {} min, {} median, {} max",
|
||||
operations.iter().map(|(_, name)| name).format(" min, "),
|
||||
operations.iter().map(|(_, name)| name).format(" median, "),
|
||||
operations.iter().map(|(_, name)| name).format(" max, ")
|
||||
);
|
||||
}
|
||||
for i in (0..len).step_by(increment) {
|
||||
let pointer = unsafe { p.offset(i) };
|
||||
|
||||
if verbosity_level >= Thresholds {
|
||||
println!("Calibration for {:p}", pointer);
|
||||
}
|
||||
|
||||
if **med == 0 {
|
||||
**sum += hist[i];
|
||||
if **sum >= (CFLUSH_NUM_ITER - hist[hist.len() - 1]) / 2 {
|
||||
**med = i;
|
||||
}
|
||||
}
|
||||
}
|
||||
if verbose_level >= Debug {
|
||||
println!("sum hit {} miss {}", hit_sum, miss_sum);
|
||||
// TODO add some useful impl to CalibrateResults
|
||||
let mut calibrate_result = CalibrateResult {
|
||||
offset: i,
|
||||
histogram: Vec::new(),
|
||||
median: vec![0; operations.len()],
|
||||
min: vec![0; operations.len()],
|
||||
max: vec![0; operations.len()],
|
||||
};
|
||||
calibrate_result.histogram.reserve(operations.len());
|
||||
|
||||
for op in operations {
|
||||
let mut hist = vec![0; buckets_num];
|
||||
for _ in 0..num_iterations {
|
||||
let time = unsafe { op.0(pointer) };
|
||||
let bucket = min(buckets_num - 1, to_bucket(time));
|
||||
hist[bucket] += 1;
|
||||
}
|
||||
calibrate_result.histogram.push(hist);
|
||||
}
|
||||
|
||||
if verbose_level >= Thresholds {
|
||||
println!("Hits: min {} max {} med {}", hit_min, hit_max, hit_med);
|
||||
println!("Miss: min {} max {} med {}", miss_min, miss_max, miss_med);
|
||||
}
|
||||
//println!("Miss max {}", miss_max.0 * CFLUSH_BUCKET_SIZE);
|
||||
//println!("Max hit {}", hit_max.0 * CFLUSH_BUCKET_SIZE);
|
||||
let mut threshold: (usize, u32) = (0, u32::max_value());
|
||||
/*for i in miss_max.0..hit_max.0 {
|
||||
if hit_histogram[i] + miss_histogram[i] < threshold.1 {
|
||||
threshold = (i, hit_histogram[i] + miss_histogram[i]);
|
||||
}
|
||||
}*/
|
||||
if verbose_level > NoOutput {
|
||||
println!("Threshold {}", threshold.0 * CFLUSH_BUCKET_SIZE);
|
||||
println!("Calibration done.");
|
||||
}
|
||||
let mut sums = vec![0; operations.len()];
|
||||
|
||||
ret.push((
|
||||
i as usize,
|
||||
hit_histogram
|
||||
let median_thresholds: Vec<u32> = calibrate_result
|
||||
.histogram
|
||||
.iter()
|
||||
.zip(&miss_histogram)
|
||||
.map(|(&x, &y)| (x, y))
|
||||
.collect(),
|
||||
threshold.0,
|
||||
));
|
||||
.map(|h| (num_iterations - h[buckets_num - 1]) / 2)
|
||||
.collect();
|
||||
|
||||
if verbosity_level >= RawResult {
|
||||
println!(
|
||||
"time {}",
|
||||
operations.iter().map(|(_, name)| name).format(" ")
|
||||
);
|
||||
}
|
||||
|
||||
for j in 0..buckets_num - 1 {
|
||||
if verbosity_level >= RawResult {
|
||||
print!("{:3}:", from_bucket(j));
|
||||
}
|
||||
// ignore the last bucket : spurious context switches etc.
|
||||
for op in 0..operations.len() {
|
||||
let hist = &calibrate_result.histogram[op][j];
|
||||
let min = &mut calibrate_result.min[op];
|
||||
let max = &mut calibrate_result.max[op];
|
||||
let med = &mut calibrate_result.median[op];
|
||||
let sum = &mut sums[op];
|
||||
if verbosity_level >= RawResult {
|
||||
print!("{:10}", hist);
|
||||
}
|
||||
|
||||
if *min == 0 {
|
||||
// looking for min
|
||||
if *hist > SPURIOUS_THRESHOLD {
|
||||
*min = from_bucket(j);
|
||||
}
|
||||
} else if *hist > SPURIOUS_THRESHOLD {
|
||||
*max = from_bucket(j);
|
||||
}
|
||||
|
||||
if *med == 0 {
|
||||
*sum += *hist;
|
||||
if *sum >= median_thresholds[op] {
|
||||
*med = from_bucket(j);
|
||||
}
|
||||
}
|
||||
}
|
||||
if verbosity_level >= RawResult {
|
||||
println!();
|
||||
}
|
||||
}
|
||||
if verbosity_level >= Thresholds {
|
||||
for (j, (_, op)) in operations.iter().enumerate() {
|
||||
println!(
|
||||
"{}: min {}, median {}, max {}",
|
||||
op,
|
||||
calibrate_result.min[j],
|
||||
calibrate_result.median[j],
|
||||
calibrate_result.max[j]
|
||||
);
|
||||
}
|
||||
println!(
|
||||
"CSV: {:p}, {}, {}, {}",
|
||||
pointer,
|
||||
calibrate_result.min.iter().format(", "),
|
||||
calibrate_result.median.iter().format(", "),
|
||||
calibrate_result.max.iter().format(", ")
|
||||
);
|
||||
}
|
||||
ret.push(calibrate_result);
|
||||
}
|
||||
ret
|
||||
}
|
||||
|
||||
#[allow(non_snake_case)]
|
||||
pub fn calibrate_L3_miss_hit(
|
||||
array: &[u8],
|
||||
cache_line_size: usize,
|
||||
verbose_level: Verbosity,
|
||||
) -> CalibrateResult {
|
||||
if verbose_level > NoOutput {
|
||||
println!("Calibrating L3 access...");
|
||||
}
|
||||
let pointer = (&array[0]) as *const u8;
|
||||
|
||||
let r = calibrate_impl(
|
||||
pointer,
|
||||
cache_line_size,
|
||||
array.len() as isize,
|
||||
&[(l3_and_reload, "L3 hit")],
|
||||
512,
|
||||
2,
|
||||
1 << 11,
|
||||
verbose_level,
|
||||
);
|
||||
|
||||
r.into_iter().next().unwrap()
|
||||
}
|
||||
|
Loading…
Reference in New Issue
Block a user