Better Cflush calibration - only miss per cache line threshold determination
This commit is contained in:
parent
b32ec3a68b
commit
cb9f598a17
@ -3,7 +3,16 @@ use crate::{flush, maccess, rdtsc_fence};
|
|||||||
#[cfg(feature = "no_std")]
|
#[cfg(feature = "no_std")]
|
||||||
use polling_serial::serial_println as println;
|
use polling_serial::serial_println as println;
|
||||||
|
|
||||||
|
#[derive(Ord, PartialOrd, Eq, PartialEq)]
|
||||||
|
pub enum Verbosity {
|
||||||
|
NoOutput,
|
||||||
|
Thresholds,
|
||||||
|
RawResult,
|
||||||
|
Debug,
|
||||||
|
}
|
||||||
|
|
||||||
extern crate alloc;
|
extern crate alloc;
|
||||||
|
use crate::calibration::Verbosity::{Debug, RawResult, Thresholds};
|
||||||
use alloc::vec;
|
use alloc::vec;
|
||||||
use alloc::vec::Vec;
|
use alloc::vec::Vec;
|
||||||
use core::cmp::min;
|
use core::cmp::min;
|
||||||
@ -132,6 +141,9 @@ pub fn calibrate_access(array: &[u8; 4096]) -> u64 {
|
|||||||
const CFLUSH_BUCKET_SIZE: usize = 1;
|
const CFLUSH_BUCKET_SIZE: usize = 1;
|
||||||
const CFLUSH_BUCKET_NUMBER: usize = 250;
|
const CFLUSH_BUCKET_NUMBER: usize = 250;
|
||||||
|
|
||||||
|
const CFLUSH_NUM_ITER: usize = 1 << 11;
|
||||||
|
const CFLUSH_SPURIOUS_THRESHOLD: usize = 1;
|
||||||
|
|
||||||
/* TODO Code cleanup :
|
/* TODO Code cleanup :
|
||||||
- change type back to a slice OK
|
- change type back to a slice OK
|
||||||
- change return type to return thresholds per cache line ?
|
- change return type to return thresholds per cache line ?
|
||||||
@ -140,9 +152,14 @@ const CFLUSH_BUCKET_NUMBER: usize = 250;
|
|||||||
- parametrize 4k vs 2M ? Or just use the slice length ? OK
|
- parametrize 4k vs 2M ? Or just use the slice length ? OK
|
||||||
*/
|
*/
|
||||||
|
|
||||||
pub fn calibrate_flush(array: &[u8], cache_line_size: usize) -> u64 {
|
pub fn calibrate_flush(
|
||||||
|
array: &[u8],
|
||||||
|
cache_line_size: usize,
|
||||||
|
verbose_level: Verbosity,
|
||||||
|
) -> Vec<(usize, Vec<(usize, usize)>, usize)> {
|
||||||
println!("Calibrating cflush...");
|
println!("Calibrating cflush...");
|
||||||
|
|
||||||
|
let mut ret = Vec::new();
|
||||||
// Allocate a target array
|
// Allocate a target array
|
||||||
// TBD why size, why the position in the array, why the type (usize)
|
// TBD why size, why the position in the array, why the type (usize)
|
||||||
//let mut array = Vec::<usize>::with_capacity(5 << 10);
|
//let mut array = Vec::<usize>::with_capacity(5 << 10);
|
||||||
@ -157,7 +174,7 @@ pub fn calibrate_flush(array: &[u8], cache_line_size: usize) -> u64 {
|
|||||||
// the address in memory we are going to target
|
// the address in memory we are going to target
|
||||||
let pointer = (&array[0]) as *const u8;
|
let pointer = (&array[0]) as *const u8;
|
||||||
|
|
||||||
if pointer as usize & 0x3f != 0 {
|
if pointer as usize & (cache_line_size - 1) != 0 {
|
||||||
panic!("not aligned nicely");
|
panic!("not aligned nicely");
|
||||||
}
|
}
|
||||||
// do a large sample of accesses to a cached line
|
// do a large sample of accesses to a cached line
|
||||||
@ -165,9 +182,11 @@ pub fn calibrate_flush(array: &[u8], cache_line_size: usize) -> u64 {
|
|||||||
let mut hit_histogram = vec![0; CFLUSH_BUCKET_NUMBER];
|
let mut hit_histogram = vec![0; CFLUSH_BUCKET_NUMBER];
|
||||||
|
|
||||||
let mut miss_histogram = hit_histogram.clone();
|
let mut miss_histogram = hit_histogram.clone();
|
||||||
|
if verbose_level >= Thresholds {
|
||||||
println!("Calibration for {:p}", unsafe { pointer.offset(i) });
|
println!("Calibration for {:p}", unsafe { pointer.offset(i) });
|
||||||
|
}
|
||||||
unsafe { load_and_flush(pointer.offset(i)) }; // align down on 64 bytes
|
unsafe { load_and_flush(pointer.offset(i)) }; // align down on 64 bytes
|
||||||
for _ in 1..(1 << 11) {
|
for _ in 1..CFLUSH_NUM_ITER {
|
||||||
let d = unsafe { load_and_flush(pointer.offset(i)) } as usize;
|
let d = unsafe { load_and_flush(pointer.offset(i)) } as usize;
|
||||||
hit_histogram[min(CFLUSH_BUCKET_NUMBER - 1, d / CFLUSH_BUCKET_SIZE) as usize] += 1;
|
hit_histogram[min(CFLUSH_BUCKET_NUMBER - 1, d / CFLUSH_BUCKET_SIZE) as usize] += 1;
|
||||||
}
|
}
|
||||||
@ -176,7 +195,7 @@ pub fn calibrate_flush(array: &[u8], cache_line_size: usize) -> u64 {
|
|||||||
unsafe { flush(pointer.offset(i)) };
|
unsafe { flush(pointer.offset(i)) };
|
||||||
|
|
||||||
unsafe { load_and_flush(pointer.offset(i)) };
|
unsafe { load_and_flush(pointer.offset(i)) };
|
||||||
for _ in 0..(1 << 10) {
|
for _ in 0..CFLUSH_NUM_ITER {
|
||||||
let d = unsafe { flush_and_flush(pointer.offset(i)) } as usize;
|
let d = unsafe { flush_and_flush(pointer.offset(i)) } as usize;
|
||||||
miss_histogram[min(CFLUSH_BUCKET_NUMBER - 1, d / CFLUSH_BUCKET_SIZE) as usize] += 1;
|
miss_histogram[min(CFLUSH_BUCKET_NUMBER - 1, d / CFLUSH_BUCKET_SIZE) as usize] += 1;
|
||||||
}
|
}
|
||||||
@ -184,35 +203,103 @@ pub fn calibrate_flush(array: &[u8], cache_line_size: usize) -> u64 {
|
|||||||
// extract min, max, & median of the distribution.
|
// extract min, max, & median of the distribution.
|
||||||
// set the threshold to mid point between miss max & hit min.
|
// set the threshold to mid point between miss max & hit min.
|
||||||
|
|
||||||
let mut hit_max: (usize, u32) = (0, 0);
|
// determine :
|
||||||
let mut miss_max: (usize, u32) = (0, 0);
|
// Hit min, max, median
|
||||||
|
// Miss min, miss max, median
|
||||||
|
// If there is no overlap the threshold is trivial
|
||||||
|
// If there is Grab the point where the ratio is balanced
|
||||||
|
|
||||||
for i in 0..hit_histogram.len() {
|
let mut hit_min = 0;
|
||||||
|
let mut hit_max = 0;
|
||||||
|
let mut miss_min = 0;
|
||||||
|
let mut miss_max = 0;
|
||||||
|
let mut miss_med = 0;
|
||||||
|
let mut hit_med = 0;
|
||||||
|
let mut hit_sum = 0;
|
||||||
|
let mut miss_sum = 0;
|
||||||
|
|
||||||
|
//let mut hit_max: (usize, u32) = (0, 0);
|
||||||
|
//let mut miss_max: (usize, u32) = (0, 0);
|
||||||
|
|
||||||
|
for i in 0..(hit_histogram.len() - 1) {
|
||||||
|
// ignore the last bucket, spurious context switches
|
||||||
|
if verbose_level >= RawResult {
|
||||||
println!(
|
println!(
|
||||||
"{:3}: {:10} {:10}",
|
"{:3}: {:10} {:10}",
|
||||||
i * CFLUSH_BUCKET_SIZE,
|
i * CFLUSH_BUCKET_SIZE,
|
||||||
hit_histogram[i],
|
hit_histogram[i],
|
||||||
miss_histogram[i]
|
miss_histogram[i]
|
||||||
);
|
);
|
||||||
if hit_max.1 < hit_histogram[i] {
|
|
||||||
hit_max = (i, hit_histogram[i]);
|
|
||||||
}
|
}
|
||||||
if miss_max.1 < miss_histogram[i] {
|
// FIXME
|
||||||
miss_max = (i, miss_histogram[i]);
|
// Code duplication for histogram analysis is meh.
|
||||||
|
// Is there a better way ?
|
||||||
|
|
||||||
|
for (min, max, med, sum, hist) in &mut [
|
||||||
|
(
|
||||||
|
&mut hit_min,
|
||||||
|
&mut hit_max,
|
||||||
|
&mut hit_med,
|
||||||
|
&mut hit_sum,
|
||||||
|
&hit_histogram,
|
||||||
|
),
|
||||||
|
(
|
||||||
|
&mut miss_min,
|
||||||
|
&mut miss_max,
|
||||||
|
&mut miss_med,
|
||||||
|
&mut miss_sum,
|
||||||
|
&miss_histogram,
|
||||||
|
),
|
||||||
|
] {
|
||||||
|
if **min == 0 {
|
||||||
|
// looking for min
|
||||||
|
if hist[i] > CFLUSH_SPURIOUS_THRESHOLD {
|
||||||
|
**min = i;
|
||||||
}
|
}
|
||||||
}
|
} else {
|
||||||
println!("Miss max {}", miss_max.0 * CFLUSH_BUCKET_SIZE);
|
// min found, looking for max
|
||||||
println!("Max hit {}", hit_max.0 * CFLUSH_BUCKET_SIZE);
|
if hist[i] > CFLUSH_SPURIOUS_THRESHOLD {
|
||||||
let mut threshold: (usize, u32) = (0, u32::max_value());
|
**max = i;
|
||||||
for i in miss_max.0..hit_max.0 {
|
|
||||||
if hit_histogram[i] + miss_histogram[i] < threshold.1 {
|
|
||||||
threshold = (i, hit_histogram[i] + miss_histogram[i]);
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
if **med == 0 {
|
||||||
|
**sum += hist[i];
|
||||||
|
if **sum >= CFLUSH_NUM_ITER / 2 {
|
||||||
|
**med = i;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
if verbose_level >= Debug {
|
||||||
|
println!("sum hit {} miss {}", hit_sum, miss_sum);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
if verbose_level >= Thresholds {
|
||||||
|
println!("Hits: min {} max {} med {}", hit_min, hit_max, hit_med);
|
||||||
|
println!("Miss: min {} max {} med {}", miss_min, miss_max, miss_med);
|
||||||
|
}
|
||||||
|
//println!("Miss max {}", miss_max.0 * CFLUSH_BUCKET_SIZE);
|
||||||
|
//println!("Max hit {}", hit_max.0 * CFLUSH_BUCKET_SIZE);
|
||||||
|
let mut threshold: (usize, u32) = (0, u32::max_value());
|
||||||
|
/*for i in miss_max.0..hit_max.0 {
|
||||||
|
if hit_histogram[i] + miss_histogram[i] < threshold.1 {
|
||||||
|
threshold = (i, hit_histogram[i] + miss_histogram[i]);
|
||||||
|
}
|
||||||
|
}*/
|
||||||
|
|
||||||
println!("Threshold {}", threshold.0 * CFLUSH_BUCKET_SIZE);
|
println!("Threshold {}", threshold.0 * CFLUSH_BUCKET_SIZE);
|
||||||
println!("Calibration done.");
|
println!("Calibration done.");
|
||||||
|
|
||||||
|
ret.push((
|
||||||
|
i as usize,
|
||||||
|
hit_histogram
|
||||||
|
.iter()
|
||||||
|
.zip(&miss_histogram)
|
||||||
|
.map(|(&x, &y)| (x, y))
|
||||||
|
.collect(),
|
||||||
|
threshold.0,
|
||||||
|
));
|
||||||
}
|
}
|
||||||
//(threshold.0 * CFLUSH_BUCKET_SIZE) as u64
|
ret
|
||||||
0
|
|
||||||
}
|
}
|
||||||
|
@ -1,6 +1,60 @@
|
|||||||
// TODO create a nice program that can run on a system and will do the calibration.
|
// TODO create a nice program that can run on a system and will do the calibration.
|
||||||
// Make multithreaded, with core pinning or single threaded pinned to cores from the shell ?
|
// Calibration has to be sequential
|
||||||
|
// Will pin on each core one after the other
|
||||||
|
|
||||||
|
//fn execute_on_core(FnOnce)
|
||||||
|
|
||||||
|
#![feature(vec_resize_default)]
|
||||||
|
|
||||||
|
use cache_utils::calibration::calibrate_flush;
|
||||||
|
use cache_utils::calibration::Verbosity;
|
||||||
|
|
||||||
|
use nix::errno::Errno;
|
||||||
|
use nix::sched::{sched_getaffinity, sched_setaffinity, CpuSet};
|
||||||
|
use nix::unistd::Pid;
|
||||||
|
use nix::Error::Sys;
|
||||||
|
|
||||||
|
#[repr(align(4096))]
|
||||||
|
struct Page {
|
||||||
|
pub mem: [u8; 4096],
|
||||||
|
}
|
||||||
|
|
||||||
pub fn main() {
|
pub fn main() {
|
||||||
println!("Hello World!");
|
println!("Hello World!");
|
||||||
|
|
||||||
|
let p = Box::new(Page { mem: [0; 4096] });
|
||||||
|
|
||||||
|
let m: &[u8] = &p.mem;
|
||||||
|
|
||||||
|
eprintln!("Count: {}", CpuSet::count());
|
||||||
|
|
||||||
|
let old = sched_getaffinity(Pid::from_raw(0)).unwrap();
|
||||||
|
|
||||||
|
eprintln!("old: {:?}", old);
|
||||||
|
|
||||||
|
for i in 0..(CpuSet::count() - 1) {
|
||||||
|
if old.is_set(i).unwrap() {
|
||||||
|
println!("Iteration {}...", i);
|
||||||
|
let mut core = CpuSet::new();
|
||||||
|
core.set(i).unwrap();
|
||||||
|
|
||||||
|
match sched_setaffinity(Pid::from_raw(0), &core) {
|
||||||
|
Ok(()) => {
|
||||||
|
calibrate_flush(m, 64, Verbosity::Thresholds);
|
||||||
|
sched_setaffinity(Pid::from_raw(0), &old).unwrap();
|
||||||
|
println!("Iteration {}...ok ", i);
|
||||||
|
}
|
||||||
|
Err(Sys(Errno::EINVAL)) => {
|
||||||
|
println!("skipping");
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
Err(_) => {
|
||||||
|
panic!("Unexpected error while setting affinity");
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// Let's grab all the list of CPUS
|
||||||
|
// Then iterate the calibration on each CPU core.
|
||||||
}
|
}
|
||||||
|
@ -29,8 +29,8 @@ pub fn enable_prefetchers(status: bool) {
|
|||||||
}
|
}
|
||||||
|
|
||||||
pub fn prefetcher_fun(
|
pub fn prefetcher_fun(
|
||||||
victim4k_addr: *mut u8,
|
victim_4k_addr: *mut u8,
|
||||||
victim2M_addr: *mut u8,
|
victim_2M_addr: *mut u8,
|
||||||
threshold_ff: u64,
|
threshold_ff: u64,
|
||||||
) -> Vec<i32> {
|
) -> Vec<i32> {
|
||||||
let mut results = vec![0; 4096 / 64];
|
let mut results = vec![0; 4096 / 64];
|
||||||
@ -38,7 +38,7 @@ pub fn prefetcher_fun(
|
|||||||
for _ in 0..N {
|
for _ in 0..N {
|
||||||
//unsafe { maccess(victim4kaddr) };
|
//unsafe { maccess(victim4kaddr) };
|
||||||
for j in (0..4096).step_by(64).rev() {
|
for j in (0..4096).step_by(64).rev() {
|
||||||
let t = unsafe { only_flush(victim4k_addr.offset(j)) };
|
let t = unsafe { only_flush(victim_4k_addr.offset(j)) };
|
||||||
if threshold_ff < t {
|
if threshold_ff < t {
|
||||||
// hit
|
// hit
|
||||||
results[(j / 64) as usize] += 1;
|
results[(j / 64) as usize] += 1;
|
||||||
|
15
src/main.rs
15
src/main.rs
@ -35,6 +35,7 @@ use x86_64::PhysAddr;
|
|||||||
use x86_64::VirtAddr;
|
use x86_64::VirtAddr;
|
||||||
|
|
||||||
use arrayref;
|
use arrayref;
|
||||||
|
use cache_utils::calibration::Verbosity;
|
||||||
|
|
||||||
// Custom panic handler, required for freestanding program
|
// Custom panic handler, required for freestanding program
|
||||||
#[cfg(not(test))]
|
#[cfg(not(test))]
|
||||||
@ -214,6 +215,8 @@ fn kernel_main(boot_info: &'static BootInfo) -> ! {
|
|||||||
|
|
||||||
let cache_line_size = cache_line_size.unwrap_or(64) as usize;
|
let cache_line_size = cache_line_size.unwrap_or(64) as usize;
|
||||||
|
|
||||||
|
serial_println!("cache line size: {}", cache_line_size);
|
||||||
|
|
||||||
println!(
|
println!(
|
||||||
"prefetcher status: {}",
|
"prefetcher status: {}",
|
||||||
cache_utils::prefetcher::prefetcher_status()
|
cache_utils::prefetcher::prefetcher_status()
|
||||||
@ -226,7 +229,7 @@ fn kernel_main(boot_info: &'static BootInfo) -> ! {
|
|||||||
4096
|
4096
|
||||||
]
|
]
|
||||||
});
|
});
|
||||||
let threshold_flush_p = cache_utils::calibration::calibrate_flush(
|
let flush_result_p = cache_utils::calibration::calibrate_flush(
|
||||||
unsafe {
|
unsafe {
|
||||||
arrayref::array_ref![
|
arrayref::array_ref![
|
||||||
core::slice::from_raw_parts(victim4k_start as *mut u8, 4096),
|
core::slice::from_raw_parts(victim4k_start as *mut u8, 4096),
|
||||||
@ -235,6 +238,7 @@ fn kernel_main(boot_info: &'static BootInfo) -> ! {
|
|||||||
]
|
]
|
||||||
},
|
},
|
||||||
cache_line_size,
|
cache_line_size,
|
||||||
|
Verbosity::RawResult,
|
||||||
);
|
);
|
||||||
cache_utils::prefetcher::enable_prefetchers(false);
|
cache_utils::prefetcher::enable_prefetchers(false);
|
||||||
serial_println!("Prefetcher disabled");
|
serial_println!("Prefetcher disabled");
|
||||||
@ -245,7 +249,7 @@ fn kernel_main(boot_info: &'static BootInfo) -> ! {
|
|||||||
4096
|
4096
|
||||||
]
|
]
|
||||||
});
|
});
|
||||||
let threshold_flush = cache_utils::calibration::calibrate_flush(
|
let flush_resut = cache_utils::calibration::calibrate_flush(
|
||||||
unsafe {
|
unsafe {
|
||||||
arrayref::array_ref![
|
arrayref::array_ref![
|
||||||
core::slice::from_raw_parts(victim4k_start as *mut u8, 4096),
|
core::slice::from_raw_parts(victim4k_start as *mut u8, 4096),
|
||||||
@ -254,15 +258,16 @@ fn kernel_main(boot_info: &'static BootInfo) -> ! {
|
|||||||
]
|
]
|
||||||
},
|
},
|
||||||
cache_line_size,
|
cache_line_size,
|
||||||
|
Verbosity::RawResult,
|
||||||
);
|
);
|
||||||
serial_println!("Please compare histograms for sanity");
|
serial_println!("Please compare histograms for sanity");
|
||||||
|
|
||||||
if distance(threshold_access_p, threshold_access) > 10
|
if distance(threshold_access_p, threshold_access) > 10 {
|
||||||
|| distance(threshold_flush_p, threshold_flush) > 2
|
|
||||||
{
|
|
||||||
panic!("Inconsistent thresholds");
|
panic!("Inconsistent thresholds");
|
||||||
}
|
}
|
||||||
|
|
||||||
|
let threshold_flush = 0; // FIXME
|
||||||
|
|
||||||
serial_println!("0");
|
serial_println!("0");
|
||||||
let r_no_prefetch = prefetcher_fun(
|
let r_no_prefetch = prefetcher_fun(
|
||||||
victim4k_start as *mut u8,
|
victim4k_start as *mut u8,
|
||||||
|
Loading…
Reference in New Issue
Block a user