Refactor ip_tool into cache_utils, start work on cache level calibration

This commit is contained in:
Guillume DIDIER 2021-11-24 17:20:04 +01:00
parent 84eee25e5a
commit c734b5ce53
4 changed files with 141 additions and 29 deletions

View File

@ -19,6 +19,8 @@ nix = { version = "0.20.0", optional = true }
libc = { version = "0.2.92", optional = true }
hashbrown = { version = "0.11.2", optional = true }
turn_lock = { path = "../turn_lock", optional = true}
lazy_static = "1.4.0"
bitvec = "0.22.3"
[features]
use_std = ["nix", "itertools/use_std", "libc", "cpuid/use_std", "turn_lock"]

View File

@ -14,6 +14,7 @@ use nix::unistd::Pid;
use core::arch::x86_64 as arch_x86;
use cache_utils::ip_tool::Function;
use core::cmp::min;
use std::cmp::Ordering;
use std::collections::HashMap;
@ -53,6 +54,53 @@ use std::str::from_utf8;
- [ ] Make the plots
*/
unsafe fn function_call(f: &Function, addr: *const u8) -> u64 {
unsafe { (f.fun)(addr) }
}
unsafe fn prepare_RAM(p: *const u8) {
unsafe { flush(p) };
}
unsafe fn prepare_pL3(p: *const u8) {
unsafe { maccess(p) };
unsafe { arch_x86::_mm_mfence() };
unsafe { flush(p) };
unsafe { arch_x86::_mm_mfence() };
unsafe { arch_x86::_mm_prefetch::<{ arch_x86::_MM_HINT_T2 }>(p as *const i8) };
unsafe { arch_x86::__cpuid_count(0, 0) };
}
unsafe fn prepare_pL2(p: *const u8) {
unsafe { maccess(p) };
unsafe { arch_x86::_mm_mfence() };
unsafe { flush(p) };
unsafe { arch_x86::_mm_mfence() };
unsafe { arch_x86::_mm_prefetch::<{ arch_x86::_MM_HINT_T1 }>(p as *const i8) };
unsafe { arch_x86::__cpuid_count(0, 0) };
}
unsafe fn prepare_pL1(p: *const u8) {
unsafe { maccess(p) };
unsafe { arch_x86::_mm_mfence() };
unsafe { flush(p) };
unsafe { arch_x86::_mm_mfence() };
unsafe { arch_x86::_mm_prefetch::<{ arch_x86::_MM_HINT_T0 }>(p as *const i8) };
unsafe { arch_x86::__cpuid_count(0, 0) };
}
unsafe fn prepare_L1(p: *const u8) {
unsafe { only_reload(p) };
}
unsafe fn prepare_eL2(p: *const u8) {
unimplemented!()
}
unsafe fn prepare_eL3(p: *const u8) {
unimplemented!()
}
unsafe fn multiple_access(p: *const u8) {
unsafe {
maccess::<u8>(p);
@ -105,26 +153,11 @@ struct Threshold {
pub num_false_miss: u32,
}
unsafe fn only_flush_wrap(_: &(), addr: *const u8) -> u64 {
unsafe { only_flush(addr) }
}
unsafe fn only_reload_wrap(_: &(), addr: *const u8) -> u64 {
unsafe { only_reload(addr) }
}
unsafe fn load_and_flush_wrap(_: &(), addr: *const u8) -> u64 {
unsafe { load_and_flush(addr) }
}
unsafe fn flush_and_reload_wrap(_: &(), addr: *const u8) -> u64 {
unsafe { flush_and_reload(addr) }
}
unsafe fn reload_and_flush_wrap(_: &(), addr: *const u8) -> u64 {
unsafe { reload_and_flush(addr) }
}
fn main() {
let measure_reload =
cache_utils::ip_tool::Function::try_new(1, 0, cache_utils::ip_tool::TIMED_MACCESS).unwrap();
let measure_nop =
cache_utils::ip_tool::Function::try_new(1, 0, cache_utils::ip_tool::TIMED_NOP).unwrap();
// Grab a slice of memory
let core_per_socket_out = Command::new("sh")
@ -172,13 +205,50 @@ fn main() {
panic!("not aligned nicely");
}
let operations = [CalibrateOperation2T {
prepare: maccess::<u8>,
op: only_flush_wrap,
name: "clflush_remote_hit",
display_name: "clflush remote hit",
t: &(),
}];
let operations = [
CalibrateOperation2T {
prepare: prepare_RAM,
op: function_call,
name: "RAM_load",
display_name: "Load from RAM",
t: &measure_reload,
},
CalibrateOperation2T {
prepare: prepare_pL3,
op: function_call,
name: "pL3_load",
display_name: "Load from L3 (prefetch)",
t: &measure_reload,
},
CalibrateOperation2T {
prepare: prepare_pL2,
op: function_call,
name: "pL2_load",
display_name: "Load from L2 (prefetch)",
t: &measure_reload,
},
CalibrateOperation2T {
prepare: prepare_pL1,
op: function_call,
name: "pL1_load",
display_name: "Load from L1 (prefetch)",
t: &measure_reload,
},
CalibrateOperation2T {
prepare: prepare_L1,
op: function_call,
name: "L1_load",
display_name: "Load from L1 (Reload)",
t: &measure_reload,
},
CalibrateOperation2T {
prepare: noop::<u8>,
op: function_call,
name: "pL3_load",
display_name: "Load from L3 (prefetch)",
t: &measure_nop,
},
];
let r = unsafe {
calibrate_fixed_freq_2_thread(

View File

@ -1,9 +1,10 @@
use crate::mmap::MMappedMemory;
use bitvec::prelude::*;
use cache_utils::mmap::MMappedMemory;
use lazy_static::lazy_static;
use std::collections::LinkedList;
use std::ptr::copy_nonoverlapping;
use std::sync::Mutex;
use std::vec::Vec;
struct WXRange {
start: usize,
@ -56,6 +57,12 @@ pub const TIMED_CLFLUSH: FunctionTemplate = FunctionTemplate {
end: timed_clflush_template_end as *const u8,
};
pub const TIMED_NOP: FunctionTemplate = FunctionTemplate {
start: timed_nop_template,
ip: timed_nop_template_ip as *const u8,
end: timed_nop_template_end as *const u8,
};
impl WXRange {
unsafe fn allocate(
&mut self,
@ -274,6 +281,31 @@ global_asm!(
".global timed_clflush_template_end",
"timed_clflush_template_end:",
"nop",
".global timed_nop_template",
"timed_nop_template:",
"mfence",
"lfence",
"rdtsc",
"shl rdx, 32",
"mov rsi, rdx",
"add rsi, rax",
"mfence",
"lfence",
".global timed_nop_template_ip",
"timed_nop_template_ip:",
"nop",
"mfence",
"lfence",
"rdtsc",
"shl rdx, 32",
"add rax, rdx",
"mfence",
"lfence",
"sub rax, rsi",
"ret",
".global timed_nop_template_end",
"timed_nop_template_end:",
"nop",
);
extern "C" {
@ -283,6 +315,9 @@ extern "C" {
fn timed_clflush_template(pointer: *const u8) -> u64;
fn timed_clflush_template_ip();
fn timed_clflush_template_end();
fn timed_nop_template(pointer: *const u8) -> u64;
fn timed_nop_template_ip();
fn timed_nop_template_end();
}
pub fn tmp_test() {

View File

@ -1,8 +1,13 @@
#![cfg_attr(feature = "no_std", no_std)]
#![feature(ptr_internals)]
#![feature(linked_list_cursors)]
#![feature(global_asm)]
#![allow(clippy::missing_safety_doc)]
#![deny(unsafe_op_in_unsafe_fn)]
use core::arch::x86_64 as arch_x86;
use core::ptr;
use static_assertions::assert_cfg;
assert_cfg!(
@ -25,8 +30,8 @@ pub mod frequency;
#[cfg(feature = "use_std")]
mod calibrate_2t;
use core::arch::x86_64 as arch_x86;
use core::ptr;
#[cfg(feature = "use_std")]
pub mod ip_tool;
// rdtsc no fence
pub unsafe fn rdtsc_nofence() -> u64 {