From 65f94dcb67bd7a48e11eaed83995bc78cd8561f9 Mon Sep 17 00:00:00 2001 From: guillaume didier Date: Wed, 1 Apr 2020 16:12:15 +0200 Subject: [PATCH] General refactor of the calibration implementation when adding l3 hit calibration This moves most of the logic on a calibrate function taking as a paramater a slice of operations to calibrate L3 hit is measured by flush followed by preftechnt1, cpuid serialization, timed access --- Cargo.lock | 16 ++ cache_utils/Cargo.toml | 4 +- cache_utils/src/calibration.rs | 326 ++++++++++++++++++++------------- 3 files changed, 216 insertions(+), 130 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index 1735881..25c5654 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -32,6 +32,7 @@ dependencies = [ name = "cache_utils" version = "0.1.0" dependencies = [ + "itertools 0.9.0 (registry+https://github.com/rust-lang/crates.io-index)", "nix 0.17.0 (registry+https://github.com/rust-lang/crates.io-index)", "polling_serial 0.1.0", "static_assertions 1.1.0 (registry+https://github.com/rust-lang/crates.io-index)", @@ -64,6 +65,19 @@ dependencies = [ "x86_64 0.9.6 (registry+https://github.com/rust-lang/crates.io-index)", ] +[[package]] +name = "either" +version = "1.5.3" +source = "registry+https://github.com/rust-lang/crates.io-index" + +[[package]] +name = "itertools" +version = "0.9.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +dependencies = [ + "either 1.5.3 (registry+https://github.com/rust-lang/crates.io-index)", +] + [[package]] name = "lazy_static" version = "1.4.0" @@ -173,6 +187,8 @@ dependencies = [ "checksum bootloader 0.9.0 (registry+https://github.com/rust-lang/crates.io-index)" = "47a31d0af1b59774ea1470bf40b4bf9fed0bbead2f2d9d614c4c2e13382414dd" "checksum cc 1.0.50 (registry+https://github.com/rust-lang/crates.io-index)" = "95e28fa049fda1c330bcf9d723be7663a899c4679724b34c81e9f5a326aab8cd" "checksum cfg-if 0.1.10 (registry+https://github.com/rust-lang/crates.io-index)" = "4785bdd1c96b2a846b2bd7cc02e86b6b3dbf14e7e53446c4f54c92a361040822" +"checksum either 1.5.3 (registry+https://github.com/rust-lang/crates.io-index)" = "bb1f6b1ce1c140482ea30ddd3335fc0024ac7ee112895426e0a629a6c20adfe3" +"checksum itertools 0.9.0 (registry+https://github.com/rust-lang/crates.io-index)" = "284f18f85651fe11e8a991b2adb42cb078325c996ed026d994719efcfca1d54b" "checksum lazy_static 1.4.0 (registry+https://github.com/rust-lang/crates.io-index)" = "e2abad23fbc42b3700f2f279844dc832adb2b2eb069b2df918f455c4e18cc646" "checksum libc 0.2.68 (registry+https://github.com/rust-lang/crates.io-index)" = "dea0c0405123bba743ee3f91f49b1c7cfb684eef0da0a50110f758ccf24cdff0" "checksum linked_list_allocator 0.8.1 (registry+https://github.com/rust-lang/crates.io-index)" = "5825aea823c659d0fdcdbe8c9b78baf56f3a10365d783db874f6d360df72626f" diff --git a/cache_utils/Cargo.toml b/cache_utils/Cargo.toml index 028e438..276dfd1 100644 --- a/cache_utils/Cargo.toml +++ b/cache_utils/Cargo.toml @@ -11,11 +11,13 @@ polling_serial = { path = "../polling_serial", optional = true } vga_buffer = { path = "../vga_buffer", optional = true } x86_64 = "0.9.2" static_assertions = "1.1.0" +itertools = { version = "0.9.0", default-features = false } nix = { version = "0.17.0", optional = true } + [features] -std = ["nix"] +std = ["nix", "itertools/use_std"] no_std = ["polling_serial", "vga_buffer"] default = ["std"] diff --git a/cache_utils/src/calibration.rs b/cache_utils/src/calibration.rs index f48283e..ee5eba5 100644 --- a/cache_utils/src/calibration.rs +++ b/cache_utils/src/calibration.rs @@ -1,5 +1,6 @@ use crate::{flush, maccess, rdtsc_fence}; +use core::arch::x86_64 as arch_x86; #[cfg(feature = "no_std")] use polling_serial::serial_println as println; @@ -16,10 +17,7 @@ use crate::calibration::Verbosity::{Debug, NoOutput, RawResult, Thresholds}; use alloc::vec; use alloc::vec::Vec; use core::cmp::min; - -// calibration, todo -// this will require getting a nice page to do some amusing stuff on it. -// it will have to return some results later. +use itertools::Itertools; pub unsafe fn only_reload(p: *const u8) -> u64 { let t = rdtsc_fence(); @@ -53,6 +51,16 @@ pub unsafe fn only_flush(p: *const u8) -> u64 { rdtsc_fence() - t } +pub unsafe fn l3_and_reload(p: *const u8) -> u64 { + flush(p); + arch_x86::_mm_mfence(); + arch_x86::_mm_prefetch(p as *const i8, arch_x86::_MM_HINT_T2); + arch_x86::__cpuid_count(0, 0); + let t = rdtsc_fence(); + maccess(p); + rdtsc_fence() - t +} + const BUCKET_SIZE: usize = 5; const BUCKET_NUMBER: usize = 250; @@ -141,7 +149,7 @@ pub fn calibrate_access(array: &[u8; 4096]) -> u64 { const CFLUSH_BUCKET_SIZE: usize = 1; const CFLUSH_BUCKET_NUMBER: usize = 500; -const CFLUSH_NUM_ITER: usize = 1 << 11; +const CFLUSH_NUM_ITER: u32 = 1 << 11; const CFLUSH_SPURIOUS_THRESHOLD: usize = 1; /* TODO Code cleanup : @@ -156,149 +164,209 @@ pub fn calibrate_flush( array: &[u8], cache_line_size: usize, verbose_level: Verbosity, -) -> Vec<(usize, Vec<(usize, usize)>, usize)> { - if verbose_level > NoOutput { - println!("Calibrating cflush..."); - } - let mut ret = Vec::new(); - // Allocate a target array - // TBD why size, why the position in the array, why the type (usize) - //let mut array = Vec::::with_capacity(5 << 10); - //array.resize(5 << 10, 1); - - //let array = array.into_boxed_slice(); - - // Histograms bucket of 5 and max at 400 cycles - // Magic numbers to be justified - // 80 is a size of screen - - // the address in memory we are going to target +) -> Vec { let pointer = (&array[0]) as *const u8; if pointer as usize & (cache_line_size - 1) != 0 { panic!("not aligned nicely"); } - // do a large sample of accesses to a cached line - for i in (0..(array.len() as isize)).step_by(cache_line_size) { - let mut hit_histogram = vec![0; CFLUSH_BUCKET_NUMBER]; - let mut miss_histogram = hit_histogram.clone(); - if verbose_level >= Thresholds { - println!("Calibration for {:p}", unsafe { pointer.offset(i) }); - } - unsafe { load_and_flush(pointer.offset(i)) }; // align down on 64 bytes - for _ in 1..CFLUSH_NUM_ITER { - let d = unsafe { load_and_flush(pointer.offset(i)) } as usize; - hit_histogram[min(CFLUSH_BUCKET_NUMBER - 1, d / CFLUSH_BUCKET_SIZE) as usize] += 1; + calibrate_impl( + pointer, + cache_line_size, + array.len() as isize, + &[ + (load_and_flush, "clflush hit"), + (flush_and_flush, "clflush miss"), + ], + CFLUSH_BUCKET_NUMBER, + CFLUSH_BUCKET_SIZE, + CFLUSH_NUM_ITER, + verbose_level, + ) +} + +#[derive(Debug)] +pub struct CalibrateResult { + offset: isize, + histogram: Vec>, + median: Vec, + min: Vec, + max: Vec, +} + +pub unsafe fn calibrate( + p: *const u8, + increment: usize, + len: isize, + operations: &[(unsafe fn(*const u8) -> u64, &str)], + buckets_num: usize, + bucket_size: usize, + num_iterations: u32, + verbosity_level: Verbosity, +) -> Vec { + calibrate_impl( + p, + increment, + len, + operations, + buckets_num, + bucket_size, + num_iterations, + verbosity_level, + ) +} + +const SPURIOUS_THRESHOLD: u32 = 1; +fn calibrate_impl( + p: *const u8, + increment: usize, + len: isize, + operations: &[(unsafe fn(*const u8) -> u64, &str)], + buckets_num: usize, + bucket_size: usize, + num_iterations: u32, + verbosity_level: Verbosity, +) -> Vec { + if verbosity_level >= Thresholds { + println!( + "Calibrating {}...", + operations.iter().map(|(_, name)| { name }).format(", ") + ); + } + + let to_bucket = |time: u64| -> usize { time as usize / bucket_size }; + let from_bucket = |bucket: usize| -> u64 { (bucket * bucket_size) as u64 }; + let mut ret = Vec::new(); + if verbosity_level >= Thresholds { + println!( + "CSV: address, {} min, {} median, {} max", + operations.iter().map(|(_, name)| name).format(" min, "), + operations.iter().map(|(_, name)| name).format(" median, "), + operations.iter().map(|(_, name)| name).format(" max, ") + ); + } + for i in (0..len).step_by(increment) { + let pointer = unsafe { p.offset(i) }; + + if verbosity_level >= Thresholds { + println!("Calibration for {:p}", pointer); } - // do a large numer of accesses to uncached line - unsafe { flush(pointer.offset(i)) }; + // TODO add some useful impl to CalibrateResults + let mut calibrate_result = CalibrateResult { + offset: i, + histogram: Vec::new(), + median: vec![0; operations.len()], + min: vec![0; operations.len()], + max: vec![0; operations.len()], + }; + calibrate_result.histogram.reserve(operations.len()); - unsafe { load_and_flush(pointer.offset(i)) }; - for _ in 0..CFLUSH_NUM_ITER { - let d = unsafe { flush_and_flush(pointer.offset(i)) } as usize; - miss_histogram[min(CFLUSH_BUCKET_NUMBER - 1, d / CFLUSH_BUCKET_SIZE) as usize] += 1; + for op in operations { + let mut hist = vec![0; buckets_num]; + for _ in 0..num_iterations { + let time = unsafe { op.0(pointer) }; + let bucket = min(buckets_num - 1, to_bucket(time)); + hist[bucket] += 1; + } + calibrate_result.histogram.push(hist); } - // extract min, max, & median of the distribution. - // set the threshold to mid point between miss max & hit min. + let mut sums = vec![0; operations.len()]; - // determine : - // Hit min, max, median - // Miss min, miss max, median - // If there is no overlap the threshold is trivial - // If there is Grab the point where the ratio is balanced + let median_thresholds: Vec = calibrate_result + .histogram + .iter() + .map(|h| (num_iterations - h[buckets_num - 1]) / 2) + .collect(); - let mut hit_min = 0; - let mut hit_max = 0; - let mut miss_min = 0; - let mut miss_max = 0; - let mut miss_med = 0; - let mut hit_med = 0; - let mut hit_sum = 0; - let mut miss_sum = 0; + if verbosity_level >= RawResult { + println!( + "time {}", + operations.iter().map(|(_, name)| name).format(" ") + ); + } - //let mut hit_max: (usize, u32) = (0, 0); - //let mut miss_max: (usize, u32) = (0, 0); + for j in 0..buckets_num - 1 { + if verbosity_level >= RawResult { + print!("{:3}:", from_bucket(j)); + } + // ignore the last bucket : spurious context switches etc. + for op in 0..operations.len() { + let hist = &calibrate_result.histogram[op][j]; + let min = &mut calibrate_result.min[op]; + let max = &mut calibrate_result.max[op]; + let med = &mut calibrate_result.median[op]; + let sum = &mut sums[op]; + if verbosity_level >= RawResult { + print!("{:10}", hist); + } - for i in 0..(hit_histogram.len() - 1) { - // ignore the last bucket, spurious context switches - if verbose_level >= RawResult { + if *min == 0 { + // looking for min + if *hist > SPURIOUS_THRESHOLD { + *min = from_bucket(j); + } + } else if *hist > SPURIOUS_THRESHOLD { + *max = from_bucket(j); + } + + if *med == 0 { + *sum += *hist; + if *sum >= median_thresholds[op] { + *med = from_bucket(j); + } + } + } + if verbosity_level >= RawResult { + println!(); + } + } + if verbosity_level >= Thresholds { + for (j, (_, op)) in operations.iter().enumerate() { println!( - "{:3}: {:10} {:10}", - i * CFLUSH_BUCKET_SIZE, - hit_histogram[i], - miss_histogram[i] + "{}: min {}, median {}, max {}", + op, + calibrate_result.min[j], + calibrate_result.median[j], + calibrate_result.max[j] ); } - - for (min, max, med, sum, hist) in &mut [ - ( - &mut hit_min, - &mut hit_max, - &mut hit_med, - &mut hit_sum, - &hit_histogram, - ), - ( - &mut miss_min, - &mut miss_max, - &mut miss_med, - &mut miss_sum, - &miss_histogram, - ), - ] { - if **min == 0 { - // looking for min - if hist[i] > CFLUSH_SPURIOUS_THRESHOLD { - **min = i; - } - } else { - // min found, looking for max - if hist[i] > CFLUSH_SPURIOUS_THRESHOLD { - **max = i; - } - } - - if **med == 0 { - **sum += hist[i]; - if **sum >= (CFLUSH_NUM_ITER - hist[hist.len() - 1]) / 2 { - **med = i; - } - } - } - if verbose_level >= Debug { - println!("sum hit {} miss {}", hit_sum, miss_sum); - } + println!( + "CSV: {:p}, {}, {}, {}", + pointer, + calibrate_result.min.iter().format(", "), + calibrate_result.median.iter().format(", "), + calibrate_result.max.iter().format(", ") + ); } - - if verbose_level >= Thresholds { - println!("Hits: min {} max {} med {}", hit_min, hit_max, hit_med); - println!("Miss: min {} max {} med {}", miss_min, miss_max, miss_med); - } - //println!("Miss max {}", miss_max.0 * CFLUSH_BUCKET_SIZE); - //println!("Max hit {}", hit_max.0 * CFLUSH_BUCKET_SIZE); - let mut threshold: (usize, u32) = (0, u32::max_value()); - /*for i in miss_max.0..hit_max.0 { - if hit_histogram[i] + miss_histogram[i] < threshold.1 { - threshold = (i, hit_histogram[i] + miss_histogram[i]); - } - }*/ - if verbose_level > NoOutput { - println!("Threshold {}", threshold.0 * CFLUSH_BUCKET_SIZE); - println!("Calibration done."); - } - - ret.push(( - i as usize, - hit_histogram - .iter() - .zip(&miss_histogram) - .map(|(&x, &y)| (x, y)) - .collect(), - threshold.0, - )); + ret.push(calibrate_result); } ret } + +#[allow(non_snake_case)] +pub fn calibrate_L3_miss_hit( + array: &[u8], + cache_line_size: usize, + verbose_level: Verbosity, +) -> CalibrateResult { + if verbose_level > NoOutput { + println!("Calibrating L3 access..."); + } + let pointer = (&array[0]) as *const u8; + + let r = calibrate_impl( + pointer, + cache_line_size, + array.len() as isize, + &[(l3_and_reload, "L3 hit")], + 512, + 2, + 1 << 11, + verbose_level, + ); + + r.into_iter().next().unwrap() +}