Carious enhancements to cache calibration
This commit is contained in:
parent
3057f11512
commit
a176033851
@ -6,7 +6,6 @@ extern crate alloc;
|
|||||||
|
|
||||||
use alloc::vec::Vec;
|
use alloc::vec::Vec;
|
||||||
use core::arch::x86_64 as arch_x86;
|
use core::arch::x86_64 as arch_x86;
|
||||||
|
|
||||||
const CACHE_INFO_CPUID_LEAF: u32 = 0x4;
|
const CACHE_INFO_CPUID_LEAF: u32 = 0x4;
|
||||||
|
|
||||||
pub fn get_cache_info() -> Vec<CacheInfo> {
|
pub fn get_cache_info() -> Vec<CacheInfo> {
|
||||||
@ -33,19 +32,19 @@ pub enum CacheType {
|
|||||||
|
|
||||||
#[derive(Debug, Clone, Copy, PartialEq, Eq)]
|
#[derive(Debug, Clone, Copy, PartialEq, Eq)]
|
||||||
pub struct CacheInfo {
|
pub struct CacheInfo {
|
||||||
cache_type: CacheType,
|
pub cache_type: CacheType,
|
||||||
level: u8,
|
pub level: u8,
|
||||||
self_init: bool,
|
pub self_init: bool,
|
||||||
fully_assoc: bool,
|
pub fully_assoc: bool,
|
||||||
core_for_cache: u16,
|
pub core_for_cache: u16,
|
||||||
core_in_package: u16,
|
pub core_in_package: u16,
|
||||||
cache_line_size: u16,
|
pub cache_line_size: u16,
|
||||||
physical_line_partition: u16,
|
pub physical_line_partition: u16,
|
||||||
associativity: u16,
|
pub associativity: u16,
|
||||||
sets: u32,
|
pub sets: u32,
|
||||||
wbinvd_no_guarantee: bool,
|
pub wbinvd_no_guarantee: bool,
|
||||||
inclusive: bool,
|
pub inclusive: bool,
|
||||||
complex_cache_indexing: bool,
|
pub complex_cache_indexing: bool,
|
||||||
}
|
}
|
||||||
|
|
||||||
impl CacheInfo {
|
impl CacheInfo {
|
||||||
|
@ -1,5 +1,5 @@
|
|||||||
use crate::{flush, maccess, rdtsc_fence};
|
use crate::{flush, maccess, rdtsc_fence};
|
||||||
use polling_serial::serial_println;
|
use polling_serial::serial_println as println;
|
||||||
|
|
||||||
extern crate alloc;
|
extern crate alloc;
|
||||||
use alloc::vec;
|
use alloc::vec;
|
||||||
@ -45,8 +45,10 @@ pub unsafe fn only_flush(p: *const u8) -> u64 {
|
|||||||
const BUCKET_SIZE: usize = 5;
|
const BUCKET_SIZE: usize = 5;
|
||||||
const BUCKET_NUMBER: usize = 250;
|
const BUCKET_NUMBER: usize = 250;
|
||||||
|
|
||||||
|
// TODO same as below, also add the whole page calibration
|
||||||
|
|
||||||
pub fn calibrate_access(array: &[u8; 4096]) -> u64 {
|
pub fn calibrate_access(array: &[u8; 4096]) -> u64 {
|
||||||
serial_println!("Calibrating...");
|
println!("Calibrating...");
|
||||||
|
|
||||||
// Allocate a target array
|
// Allocate a target array
|
||||||
// TBD why size, why the position in the array, why the type (usize)
|
// TBD why size, why the position in the array, why the type (usize)
|
||||||
@ -66,7 +68,7 @@ pub fn calibrate_access(array: &[u8; 4096]) -> u64 {
|
|||||||
// the address in memory we are going to target
|
// the address in memory we are going to target
|
||||||
let pointer = &array[0] as *const u8;
|
let pointer = &array[0] as *const u8;
|
||||||
|
|
||||||
serial_println!("buffer start {:p}", pointer);
|
println!("buffer start {:p}", pointer);
|
||||||
|
|
||||||
if pointer as usize & 0x3f != 0 {
|
if pointer as usize & 0x3f != 0 {
|
||||||
panic!("not aligned nicely");
|
panic!("not aligned nicely");
|
||||||
@ -94,7 +96,7 @@ pub fn calibrate_access(array: &[u8; 4096]) -> u64 {
|
|||||||
let mut hit_max_i = 0;
|
let mut hit_max_i = 0;
|
||||||
let mut miss_min_i = 0;
|
let mut miss_min_i = 0;
|
||||||
for i in 0..hit_histogram.len() {
|
for i in 0..hit_histogram.len() {
|
||||||
serial_println!(
|
println!(
|
||||||
"{:3}: {:10} {:10}",
|
"{:3}: {:10} {:10}",
|
||||||
i * BUCKET_SIZE,
|
i * BUCKET_SIZE,
|
||||||
hit_histogram[i],
|
hit_histogram[i],
|
||||||
@ -108,8 +110,8 @@ pub fn calibrate_access(array: &[u8; 4096]) -> u64 {
|
|||||||
miss_min_i = i
|
miss_min_i = i
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
serial_println!("Miss min {}", miss_min_i * BUCKET_SIZE);
|
println!("Miss min {}", miss_min_i * BUCKET_SIZE);
|
||||||
serial_println!("Max hit {}", hit_max_i * BUCKET_SIZE);
|
println!("Max hit {}", hit_max_i * BUCKET_SIZE);
|
||||||
|
|
||||||
let mut min = u32::max_value();
|
let mut min = u32::max_value();
|
||||||
let mut min_i = 0;
|
let mut min_i = 0;
|
||||||
@ -120,16 +122,24 @@ pub fn calibrate_access(array: &[u8; 4096]) -> u64 {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
serial_println!("Threshold {}", min_i * BUCKET_SIZE);
|
println!("Threshold {}", min_i * BUCKET_SIZE);
|
||||||
serial_println!("Calibration done.");
|
println!("Calibration done.");
|
||||||
(min_i * BUCKET_SIZE) as u64
|
(min_i * BUCKET_SIZE) as u64
|
||||||
}
|
}
|
||||||
|
|
||||||
const CFLUSH_BUCKET_SIZE: usize = 1;
|
const CFLUSH_BUCKET_SIZE: usize = 1;
|
||||||
const CFLUSH_BUCKET_NUMBER: usize = 250;
|
const CFLUSH_BUCKET_NUMBER: usize = 250;
|
||||||
|
|
||||||
pub fn calibrate_flush(array: &[u8; 4096]) -> u64 {
|
/* TODO Code cleanup :
|
||||||
serial_println!("Calibrating cflush...");
|
- change type back to a slice OK
|
||||||
|
- change return type to return thresholds per cache line ?
|
||||||
|
- change iteration to be per cache line OK
|
||||||
|
- take the cache line size as a parameter OK
|
||||||
|
- parametrize 4k vs 2M ? Or just use the slice length ? OK
|
||||||
|
*/
|
||||||
|
|
||||||
|
pub fn calibrate_flush(array: &[u8], cache_line_size: usize) -> u64 {
|
||||||
|
println!("Calibrating cflush...");
|
||||||
|
|
||||||
// Allocate a target array
|
// Allocate a target array
|
||||||
// TBD why size, why the position in the array, why the type (usize)
|
// TBD why size, why the position in the array, why the type (usize)
|
||||||
@ -149,13 +159,13 @@ pub fn calibrate_flush(array: &[u8; 4096]) -> u64 {
|
|||||||
panic!("not aligned nicely");
|
panic!("not aligned nicely");
|
||||||
}
|
}
|
||||||
// do a large sample of accesses to a cached line
|
// do a large sample of accesses to a cached line
|
||||||
for i in 0..(4 << 10) {
|
for i in (0..(array.len() as isize)).step_by(cache_line_size) {
|
||||||
let mut hit_histogram = vec![0; CFLUSH_BUCKET_NUMBER];
|
let mut hit_histogram = vec![0; CFLUSH_BUCKET_NUMBER];
|
||||||
|
|
||||||
let mut miss_histogram = hit_histogram.clone();
|
let mut miss_histogram = hit_histogram.clone();
|
||||||
serial_println!("Calibration for {:p}", unsafe { pointer.offset(i) });
|
println!("Calibration for {:p}", unsafe { pointer.offset(i) });
|
||||||
unsafe { load_and_flush(pointer.offset(i)) }; // align down on 64 bytes
|
unsafe { load_and_flush(pointer.offset(i)) }; // align down on 64 bytes
|
||||||
for _ in 1..(1 << 10) {
|
for _ in 1..(1 << 11) {
|
||||||
let d = unsafe { load_and_flush(pointer.offset(i)) } as usize;
|
let d = unsafe { load_and_flush(pointer.offset(i)) } as usize;
|
||||||
hit_histogram[min(CFLUSH_BUCKET_NUMBER - 1, d / CFLUSH_BUCKET_SIZE) as usize] += 1;
|
hit_histogram[min(CFLUSH_BUCKET_NUMBER - 1, d / CFLUSH_BUCKET_SIZE) as usize] += 1;
|
||||||
}
|
}
|
||||||
@ -169,11 +179,14 @@ pub fn calibrate_flush(array: &[u8; 4096]) -> u64 {
|
|||||||
miss_histogram[min(CFLUSH_BUCKET_NUMBER - 1, d / CFLUSH_BUCKET_SIZE) as usize] += 1;
|
miss_histogram[min(CFLUSH_BUCKET_NUMBER - 1, d / CFLUSH_BUCKET_SIZE) as usize] += 1;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// extract min, max, & median of the distribution.
|
||||||
|
// set the threshold to mid point between miss max & hit min.
|
||||||
|
|
||||||
let mut hit_max: (usize, u32) = (0, 0);
|
let mut hit_max: (usize, u32) = (0, 0);
|
||||||
let mut miss_max: (usize, u32) = (0, 0);
|
let mut miss_max: (usize, u32) = (0, 0);
|
||||||
|
|
||||||
for i in 0..hit_histogram.len() {
|
for i in 0..hit_histogram.len() {
|
||||||
serial_println!(
|
println!(
|
||||||
"{:3}: {:10} {:10}",
|
"{:3}: {:10} {:10}",
|
||||||
i * CFLUSH_BUCKET_SIZE,
|
i * CFLUSH_BUCKET_SIZE,
|
||||||
hit_histogram[i],
|
hit_histogram[i],
|
||||||
@ -186,8 +199,8 @@ pub fn calibrate_flush(array: &[u8; 4096]) -> u64 {
|
|||||||
miss_max = (i, miss_histogram[i]);
|
miss_max = (i, miss_histogram[i]);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
serial_println!("Miss max {}", miss_max.0 * CFLUSH_BUCKET_SIZE);
|
println!("Miss max {}", miss_max.0 * CFLUSH_BUCKET_SIZE);
|
||||||
serial_println!("Max hit {}", hit_max.0 * CFLUSH_BUCKET_SIZE);
|
println!("Max hit {}", hit_max.0 * CFLUSH_BUCKET_SIZE);
|
||||||
let mut threshold: (usize, u32) = (0, u32::max_value());
|
let mut threshold: (usize, u32) = (0, u32::max_value());
|
||||||
for i in miss_max.0..hit_max.0 {
|
for i in miss_max.0..hit_max.0 {
|
||||||
if hit_histogram[i] + miss_histogram[i] < threshold.1 {
|
if hit_histogram[i] + miss_histogram[i] < threshold.1 {
|
||||||
@ -195,8 +208,8 @@ pub fn calibrate_flush(array: &[u8; 4096]) -> u64 {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
serial_println!("Threshold {}", threshold.0 * CFLUSH_BUCKET_SIZE);
|
println!("Threshold {}", threshold.0 * CFLUSH_BUCKET_SIZE);
|
||||||
serial_println!("Calibration done.");
|
println!("Calibration done.");
|
||||||
}
|
}
|
||||||
//(threshold.0 * CFLUSH_BUCKET_SIZE) as u64
|
//(threshold.0 * CFLUSH_BUCKET_SIZE) as u64
|
||||||
0
|
0
|
||||||
|
2
cache_utils/src/main.rs
Normal file
2
cache_utils/src/main.rs
Normal file
@ -0,0 +1,2 @@
|
|||||||
|
// TODO create a nice program that can run on a system and will do the calibration.
|
||||||
|
// Make multithreaded, with core pinning or single threaded pinned to cores from the shell ?
|
26
src/main.rs
26
src/main.rs
@ -201,6 +201,18 @@ fn kernel_main(boot_info: &'static BootInfo) -> ! {
|
|||||||
serial_println!("{:#?}", caches);
|
serial_println!("{:#?}", caches);
|
||||||
|
|
||||||
println!("Caches: {:?}", caches);
|
println!("Caches: {:?}", caches);
|
||||||
|
let mut cache_line_size: Option<u16> = None;
|
||||||
|
for cache in caches {
|
||||||
|
if let Some(cache_line_size) = cache_line_size {
|
||||||
|
if cache_line_size != cache.cache_line_size {
|
||||||
|
unimplemented!("Does not support multiple cache line for now");
|
||||||
|
}
|
||||||
|
} else {
|
||||||
|
cache_line_size = Some(cache.cache_line_size)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
let cache_line_size = cache_line_size.unwrap_or(64) as usize;
|
||||||
|
|
||||||
println!(
|
println!(
|
||||||
"prefetcher status: {}",
|
"prefetcher status: {}",
|
||||||
@ -214,13 +226,16 @@ fn kernel_main(boot_info: &'static BootInfo) -> ! {
|
|||||||
4096
|
4096
|
||||||
]
|
]
|
||||||
});
|
});
|
||||||
let threshold_flush_p = cache_utils::calibration::calibrate_flush(unsafe {
|
let threshold_flush_p = cache_utils::calibration::calibrate_flush(
|
||||||
|
unsafe {
|
||||||
arrayref::array_ref![
|
arrayref::array_ref![
|
||||||
core::slice::from_raw_parts(victim4k_start as *mut u8, 4096),
|
core::slice::from_raw_parts(victim4k_start as *mut u8, 4096),
|
||||||
0,
|
0,
|
||||||
4096
|
4096
|
||||||
]
|
]
|
||||||
});
|
},
|
||||||
|
cache_line_size,
|
||||||
|
);
|
||||||
cache_utils::prefetcher::enable_prefetchers(false);
|
cache_utils::prefetcher::enable_prefetchers(false);
|
||||||
serial_println!("Prefetcher disabled");
|
serial_println!("Prefetcher disabled");
|
||||||
let threshold_access = cache_utils::calibration::calibrate_access(unsafe {
|
let threshold_access = cache_utils::calibration::calibrate_access(unsafe {
|
||||||
@ -230,13 +245,16 @@ fn kernel_main(boot_info: &'static BootInfo) -> ! {
|
|||||||
4096
|
4096
|
||||||
]
|
]
|
||||||
});
|
});
|
||||||
let threshold_flush = cache_utils::calibration::calibrate_flush(unsafe {
|
let threshold_flush = cache_utils::calibration::calibrate_flush(
|
||||||
|
unsafe {
|
||||||
arrayref::array_ref![
|
arrayref::array_ref![
|
||||||
core::slice::from_raw_parts(victim4k_start as *mut u8, 4096),
|
core::slice::from_raw_parts(victim4k_start as *mut u8, 4096),
|
||||||
0,
|
0,
|
||||||
4096
|
4096
|
||||||
]
|
]
|
||||||
});
|
},
|
||||||
|
cache_line_size,
|
||||||
|
);
|
||||||
serial_println!("Please compare histograms for sanity");
|
serial_println!("Please compare histograms for sanity");
|
||||||
|
|
||||||
if distance(threshold_access_p, threshold_access) > 10
|
if distance(threshold_access_p, threshold_access) > 10
|
||||||
|
Loading…
Reference in New Issue
Block a user