From 65f94dcb67bd7a48e11eaed83995bc78cd8561f9 Mon Sep 17 00:00:00 2001
From: guillaume didier <guillaume.didier@inria.fr>
Date: Wed, 1 Apr 2020 16:12:15 +0200
Subject: [PATCH] General refactor of the calibration implementation when
 adding l3 hit calibration

This moves most of the logic on a calibrate function taking as a paramater a slice of operations to calibrate
L3 hit is measured by flush followed by preftechnt1, cpuid serialization, timed access
---
 Cargo.lock                     |  16 ++
 cache_utils/Cargo.toml         |   4 +-
 cache_utils/src/calibration.rs | 326 ++++++++++++++++++++-------------
 3 files changed, 216 insertions(+), 130 deletions(-)

diff --git a/Cargo.lock b/Cargo.lock
index 1735881..25c5654 100644
--- a/Cargo.lock
+++ b/Cargo.lock
@@ -32,6 +32,7 @@ dependencies = [
 name = "cache_utils"
 version = "0.1.0"
 dependencies = [
+ "itertools 0.9.0 (registry+https://github.com/rust-lang/crates.io-index)",
  "nix 0.17.0 (registry+https://github.com/rust-lang/crates.io-index)",
  "polling_serial 0.1.0",
  "static_assertions 1.1.0 (registry+https://github.com/rust-lang/crates.io-index)",
@@ -64,6 +65,19 @@ dependencies = [
  "x86_64 0.9.6 (registry+https://github.com/rust-lang/crates.io-index)",
 ]
 
+[[package]]
+name = "either"
+version = "1.5.3"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+
+[[package]]
+name = "itertools"
+version = "0.9.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+dependencies = [
+ "either 1.5.3 (registry+https://github.com/rust-lang/crates.io-index)",
+]
+
 [[package]]
 name = "lazy_static"
 version = "1.4.0"
@@ -173,6 +187,8 @@ dependencies = [
 "checksum bootloader 0.9.0 (registry+https://github.com/rust-lang/crates.io-index)" = "47a31d0af1b59774ea1470bf40b4bf9fed0bbead2f2d9d614c4c2e13382414dd"
 "checksum cc 1.0.50 (registry+https://github.com/rust-lang/crates.io-index)" = "95e28fa049fda1c330bcf9d723be7663a899c4679724b34c81e9f5a326aab8cd"
 "checksum cfg-if 0.1.10 (registry+https://github.com/rust-lang/crates.io-index)" = "4785bdd1c96b2a846b2bd7cc02e86b6b3dbf14e7e53446c4f54c92a361040822"
+"checksum either 1.5.3 (registry+https://github.com/rust-lang/crates.io-index)" = "bb1f6b1ce1c140482ea30ddd3335fc0024ac7ee112895426e0a629a6c20adfe3"
+"checksum itertools 0.9.0 (registry+https://github.com/rust-lang/crates.io-index)" = "284f18f85651fe11e8a991b2adb42cb078325c996ed026d994719efcfca1d54b"
 "checksum lazy_static 1.4.0 (registry+https://github.com/rust-lang/crates.io-index)" = "e2abad23fbc42b3700f2f279844dc832adb2b2eb069b2df918f455c4e18cc646"
 "checksum libc 0.2.68 (registry+https://github.com/rust-lang/crates.io-index)" = "dea0c0405123bba743ee3f91f49b1c7cfb684eef0da0a50110f758ccf24cdff0"
 "checksum linked_list_allocator 0.8.1 (registry+https://github.com/rust-lang/crates.io-index)" = "5825aea823c659d0fdcdbe8c9b78baf56f3a10365d783db874f6d360df72626f"
diff --git a/cache_utils/Cargo.toml b/cache_utils/Cargo.toml
index 028e438..276dfd1 100644
--- a/cache_utils/Cargo.toml
+++ b/cache_utils/Cargo.toml
@@ -11,11 +11,13 @@ polling_serial = { path = "../polling_serial", optional = true }
 vga_buffer = { path = "../vga_buffer", optional = true }
 x86_64 = "0.9.2"
 static_assertions = "1.1.0"
+itertools = { version = "0.9.0", default-features = false }
 
 nix = { version = "0.17.0", optional = true }
 
+
 [features]
-std = ["nix"]
+std = ["nix", "itertools/use_std"]
 no_std = ["polling_serial", "vga_buffer"]
 
 default = ["std"]
diff --git a/cache_utils/src/calibration.rs b/cache_utils/src/calibration.rs
index f48283e..ee5eba5 100644
--- a/cache_utils/src/calibration.rs
+++ b/cache_utils/src/calibration.rs
@@ -1,5 +1,6 @@
 use crate::{flush, maccess, rdtsc_fence};
 
+use core::arch::x86_64 as arch_x86;
 #[cfg(feature = "no_std")]
 use polling_serial::serial_println as println;
 
@@ -16,10 +17,7 @@ use crate::calibration::Verbosity::{Debug, NoOutput, RawResult, Thresholds};
 use alloc::vec;
 use alloc::vec::Vec;
 use core::cmp::min;
-
-// calibration, todo
-// this will require getting a nice page to do some amusing stuff on it.
-// it will have to return some results later.
+use itertools::Itertools;
 
 pub unsafe fn only_reload(p: *const u8) -> u64 {
     let t = rdtsc_fence();
@@ -53,6 +51,16 @@ pub unsafe fn only_flush(p: *const u8) -> u64 {
     rdtsc_fence() - t
 }
 
+pub unsafe fn l3_and_reload(p: *const u8) -> u64 {
+    flush(p);
+    arch_x86::_mm_mfence();
+    arch_x86::_mm_prefetch(p as *const i8, arch_x86::_MM_HINT_T2);
+    arch_x86::__cpuid_count(0, 0);
+    let t = rdtsc_fence();
+    maccess(p);
+    rdtsc_fence() - t
+}
+
 const BUCKET_SIZE: usize = 5;
 const BUCKET_NUMBER: usize = 250;
 
@@ -141,7 +149,7 @@ pub fn calibrate_access(array: &[u8; 4096]) -> u64 {
 const CFLUSH_BUCKET_SIZE: usize = 1;
 const CFLUSH_BUCKET_NUMBER: usize = 500;
 
-const CFLUSH_NUM_ITER: usize = 1 << 11;
+const CFLUSH_NUM_ITER: u32 = 1 << 11;
 const CFLUSH_SPURIOUS_THRESHOLD: usize = 1;
 
 /* TODO Code cleanup :
@@ -156,149 +164,209 @@ pub fn calibrate_flush(
     array: &[u8],
     cache_line_size: usize,
     verbose_level: Verbosity,
-) -> Vec<(usize, Vec<(usize, usize)>, usize)> {
-    if verbose_level > NoOutput {
-        println!("Calibrating cflush...");
-    }
-    let mut ret = Vec::new();
-    // Allocate a target array
-    // TBD why size, why the position in the array, why the type (usize)
-    //let mut array = Vec::<usize>::with_capacity(5 << 10);
-    //array.resize(5 << 10, 1);
-
-    //let array = array.into_boxed_slice();
-
-    // Histograms bucket of 5 and max at 400 cycles
-    // Magic numbers to be justified
-    // 80 is a size of screen
-
-    // the address in memory we are going to target
+) -> Vec<CalibrateResult> {
     let pointer = (&array[0]) as *const u8;
 
     if pointer as usize & (cache_line_size - 1) != 0 {
         panic!("not aligned nicely");
     }
-    // do a large sample of accesses to a cached line
-    for i in (0..(array.len() as isize)).step_by(cache_line_size) {
-        let mut hit_histogram = vec![0; CFLUSH_BUCKET_NUMBER];
 
-        let mut miss_histogram = hit_histogram.clone();
-        if verbose_level >= Thresholds {
-            println!("Calibration for {:p}", unsafe { pointer.offset(i) });
-        }
-        unsafe { load_and_flush(pointer.offset(i)) }; // align down on 64 bytes
-        for _ in 1..CFLUSH_NUM_ITER {
-            let d = unsafe { load_and_flush(pointer.offset(i)) } as usize;
-            hit_histogram[min(CFLUSH_BUCKET_NUMBER - 1, d / CFLUSH_BUCKET_SIZE) as usize] += 1;
+    calibrate_impl(
+        pointer,
+        cache_line_size,
+        array.len() as isize,
+        &[
+            (load_and_flush, "clflush hit"),
+            (flush_and_flush, "clflush miss"),
+        ],
+        CFLUSH_BUCKET_NUMBER,
+        CFLUSH_BUCKET_SIZE,
+        CFLUSH_NUM_ITER,
+        verbose_level,
+    )
+}
+
+#[derive(Debug)]
+pub struct CalibrateResult {
+    offset: isize,
+    histogram: Vec<Vec<u32>>,
+    median: Vec<u64>,
+    min: Vec<u64>,
+    max: Vec<u64>,
+}
+
+pub unsafe fn calibrate(
+    p: *const u8,
+    increment: usize,
+    len: isize,
+    operations: &[(unsafe fn(*const u8) -> u64, &str)],
+    buckets_num: usize,
+    bucket_size: usize,
+    num_iterations: u32,
+    verbosity_level: Verbosity,
+) -> Vec<CalibrateResult> {
+    calibrate_impl(
+        p,
+        increment,
+        len,
+        operations,
+        buckets_num,
+        bucket_size,
+        num_iterations,
+        verbosity_level,
+    )
+}
+
+const SPURIOUS_THRESHOLD: u32 = 1;
+fn calibrate_impl(
+    p: *const u8,
+    increment: usize,
+    len: isize,
+    operations: &[(unsafe fn(*const u8) -> u64, &str)],
+    buckets_num: usize,
+    bucket_size: usize,
+    num_iterations: u32,
+    verbosity_level: Verbosity,
+) -> Vec<CalibrateResult> {
+    if verbosity_level >= Thresholds {
+        println!(
+            "Calibrating {}...",
+            operations.iter().map(|(_, name)| { name }).format(", ")
+        );
+    }
+
+    let to_bucket = |time: u64| -> usize { time as usize / bucket_size };
+    let from_bucket = |bucket: usize| -> u64 { (bucket * bucket_size) as u64 };
+    let mut ret = Vec::new();
+    if verbosity_level >= Thresholds {
+        println!(
+            "CSV: address, {} min, {} median, {} max",
+            operations.iter().map(|(_, name)| name).format(" min, "),
+            operations.iter().map(|(_, name)| name).format(" median, "),
+            operations.iter().map(|(_, name)| name).format(" max, ")
+        );
+    }
+    for i in (0..len).step_by(increment) {
+        let pointer = unsafe { p.offset(i) };
+
+        if verbosity_level >= Thresholds {
+            println!("Calibration for {:p}", pointer);
         }
 
-        // do a large numer of accesses to uncached line
-        unsafe { flush(pointer.offset(i)) };
+        // TODO add some useful impl to CalibrateResults
+        let mut calibrate_result = CalibrateResult {
+            offset: i,
+            histogram: Vec::new(),
+            median: vec![0; operations.len()],
+            min: vec![0; operations.len()],
+            max: vec![0; operations.len()],
+        };
+        calibrate_result.histogram.reserve(operations.len());
 
-        unsafe { load_and_flush(pointer.offset(i)) };
-        for _ in 0..CFLUSH_NUM_ITER {
-            let d = unsafe { flush_and_flush(pointer.offset(i)) } as usize;
-            miss_histogram[min(CFLUSH_BUCKET_NUMBER - 1, d / CFLUSH_BUCKET_SIZE) as usize] += 1;
+        for op in operations {
+            let mut hist = vec![0; buckets_num];
+            for _ in 0..num_iterations {
+                let time = unsafe { op.0(pointer) };
+                let bucket = min(buckets_num - 1, to_bucket(time));
+                hist[bucket] += 1;
+            }
+            calibrate_result.histogram.push(hist);
         }
 
-        // extract min, max, & median of the distribution.
-        // set the threshold to mid point between miss max & hit min.
+        let mut sums = vec![0; operations.len()];
 
-        // determine :
-        // Hit min, max, median
-        // Miss min, miss max, median
-        // If there is no overlap the threshold is trivial
-        // If there is Grab the point where the ratio is balanced
+        let median_thresholds: Vec<u32> = calibrate_result
+            .histogram
+            .iter()
+            .map(|h| (num_iterations - h[buckets_num - 1]) / 2)
+            .collect();
 
-        let mut hit_min = 0;
-        let mut hit_max = 0;
-        let mut miss_min = 0;
-        let mut miss_max = 0;
-        let mut miss_med = 0;
-        let mut hit_med = 0;
-        let mut hit_sum = 0;
-        let mut miss_sum = 0;
+        if verbosity_level >= RawResult {
+            println!(
+                "time {}",
+                operations.iter().map(|(_, name)| name).format(" ")
+            );
+        }
 
-        //let mut hit_max: (usize, u32) = (0, 0);
-        //let mut miss_max: (usize, u32) = (0, 0);
+        for j in 0..buckets_num - 1 {
+            if verbosity_level >= RawResult {
+                print!("{:3}:", from_bucket(j));
+            }
+            // ignore the last bucket : spurious context switches etc.
+            for op in 0..operations.len() {
+                let hist = &calibrate_result.histogram[op][j];
+                let min = &mut calibrate_result.min[op];
+                let max = &mut calibrate_result.max[op];
+                let med = &mut calibrate_result.median[op];
+                let sum = &mut sums[op];
+                if verbosity_level >= RawResult {
+                    print!("{:10}", hist);
+                }
 
-        for i in 0..(hit_histogram.len() - 1) {
-            // ignore the last bucket, spurious context switches
-            if verbose_level >= RawResult {
+                if *min == 0 {
+                    // looking for min
+                    if *hist > SPURIOUS_THRESHOLD {
+                        *min = from_bucket(j);
+                    }
+                } else if *hist > SPURIOUS_THRESHOLD {
+                    *max = from_bucket(j);
+                }
+
+                if *med == 0 {
+                    *sum += *hist;
+                    if *sum >= median_thresholds[op] {
+                        *med = from_bucket(j);
+                    }
+                }
+            }
+            if verbosity_level >= RawResult {
+                println!();
+            }
+        }
+        if verbosity_level >= Thresholds {
+            for (j, (_, op)) in operations.iter().enumerate() {
                 println!(
-                    "{:3}: {:10} {:10}",
-                    i * CFLUSH_BUCKET_SIZE,
-                    hit_histogram[i],
-                    miss_histogram[i]
+                    "{}: min {}, median {}, max {}",
+                    op,
+                    calibrate_result.min[j],
+                    calibrate_result.median[j],
+                    calibrate_result.max[j]
                 );
             }
-
-            for (min, max, med, sum, hist) in &mut [
-                (
-                    &mut hit_min,
-                    &mut hit_max,
-                    &mut hit_med,
-                    &mut hit_sum,
-                    &hit_histogram,
-                ),
-                (
-                    &mut miss_min,
-                    &mut miss_max,
-                    &mut miss_med,
-                    &mut miss_sum,
-                    &miss_histogram,
-                ),
-            ] {
-                if **min == 0 {
-                    // looking for min
-                    if hist[i] > CFLUSH_SPURIOUS_THRESHOLD {
-                        **min = i;
-                    }
-                } else {
-                    // min found, looking for max
-                    if hist[i] > CFLUSH_SPURIOUS_THRESHOLD {
-                        **max = i;
-                    }
-                }
-
-                if **med == 0 {
-                    **sum += hist[i];
-                    if **sum >= (CFLUSH_NUM_ITER - hist[hist.len() - 1]) / 2 {
-                        **med = i;
-                    }
-                }
-            }
-            if verbose_level >= Debug {
-                println!("sum hit {} miss {}", hit_sum, miss_sum);
-            }
+            println!(
+                "CSV: {:p}, {}, {}, {}",
+                pointer,
+                calibrate_result.min.iter().format(", "),
+                calibrate_result.median.iter().format(", "),
+                calibrate_result.max.iter().format(", ")
+            );
         }
-
-        if verbose_level >= Thresholds {
-            println!("Hits: min {} max {} med {}", hit_min, hit_max, hit_med);
-            println!("Miss: min {} max {} med {}", miss_min, miss_max, miss_med);
-        }
-        //println!("Miss max {}", miss_max.0 * CFLUSH_BUCKET_SIZE);
-        //println!("Max hit {}", hit_max.0 * CFLUSH_BUCKET_SIZE);
-        let mut threshold: (usize, u32) = (0, u32::max_value());
-        /*for i in miss_max.0..hit_max.0 {
-            if hit_histogram[i] + miss_histogram[i] < threshold.1 {
-                threshold = (i, hit_histogram[i] + miss_histogram[i]);
-            }
-        }*/
-        if verbose_level > NoOutput {
-            println!("Threshold {}", threshold.0 * CFLUSH_BUCKET_SIZE);
-            println!("Calibration done.");
-        }
-
-        ret.push((
-            i as usize,
-            hit_histogram
-                .iter()
-                .zip(&miss_histogram)
-                .map(|(&x, &y)| (x, y))
-                .collect(),
-            threshold.0,
-        ));
+        ret.push(calibrate_result);
     }
     ret
 }
+
+#[allow(non_snake_case)]
+pub fn calibrate_L3_miss_hit(
+    array: &[u8],
+    cache_line_size: usize,
+    verbose_level: Verbosity,
+) -> CalibrateResult {
+    if verbose_level > NoOutput {
+        println!("Calibrating L3 access...");
+    }
+    let pointer = (&array[0]) as *const u8;
+
+    let r = calibrate_impl(
+        pointer,
+        cache_line_size,
+        array.len() as isize,
+        &[(l3_and_reload, "L3 hit")],
+        512,
+        2,
+        1 << 11,
+        verbose_level,
+    );
+
+    r.into_iter().next().unwrap()
+}