diff --git a/cache_utils/Cargo.toml b/cache_utils/Cargo.toml index 110e4b7..c05b938 100644 --- a/cache_utils/Cargo.toml +++ b/cache_utils/Cargo.toml @@ -19,6 +19,8 @@ nix = { version = "0.20.0", optional = true } libc = { version = "0.2.92", optional = true } hashbrown = { version = "0.11.2", optional = true } turn_lock = { path = "../turn_lock", optional = true} +lazy_static = "1.4.0" +bitvec = "0.22.3" [features] use_std = ["nix", "itertools/use_std", "libc", "cpuid/use_std", "turn_lock"] diff --git a/cache_utils/src/bin/cache_level_cal.rs b/cache_utils/src/bin/cache_level_cal.rs index a7a8d88..d7ec654 100644 --- a/cache_utils/src/bin/cache_level_cal.rs +++ b/cache_utils/src/bin/cache_level_cal.rs @@ -14,6 +14,7 @@ use nix::unistd::Pid; use core::arch::x86_64 as arch_x86; +use cache_utils::ip_tool::Function; use core::cmp::min; use std::cmp::Ordering; use std::collections::HashMap; @@ -53,6 +54,53 @@ use std::str::from_utf8; - [ ] Make the plots */ +unsafe fn function_call(f: &Function, addr: *const u8) -> u64 { + unsafe { (f.fun)(addr) } +} + +unsafe fn prepare_RAM(p: *const u8) { + unsafe { flush(p) }; +} + +unsafe fn prepare_pL3(p: *const u8) { + unsafe { maccess(p) }; + unsafe { arch_x86::_mm_mfence() }; + unsafe { flush(p) }; + unsafe { arch_x86::_mm_mfence() }; + unsafe { arch_x86::_mm_prefetch::<{ arch_x86::_MM_HINT_T2 }>(p as *const i8) }; + unsafe { arch_x86::__cpuid_count(0, 0) }; +} + +unsafe fn prepare_pL2(p: *const u8) { + unsafe { maccess(p) }; + unsafe { arch_x86::_mm_mfence() }; + unsafe { flush(p) }; + unsafe { arch_x86::_mm_mfence() }; + unsafe { arch_x86::_mm_prefetch::<{ arch_x86::_MM_HINT_T1 }>(p as *const i8) }; + unsafe { arch_x86::__cpuid_count(0, 0) }; +} + +unsafe fn prepare_pL1(p: *const u8) { + unsafe { maccess(p) }; + unsafe { arch_x86::_mm_mfence() }; + unsafe { flush(p) }; + unsafe { arch_x86::_mm_mfence() }; + unsafe { arch_x86::_mm_prefetch::<{ arch_x86::_MM_HINT_T0 }>(p as *const i8) }; + unsafe { arch_x86::__cpuid_count(0, 0) }; +} + +unsafe fn prepare_L1(p: *const u8) { + unsafe { only_reload(p) }; +} + +unsafe fn prepare_eL2(p: *const u8) { + unimplemented!() +} + +unsafe fn prepare_eL3(p: *const u8) { + unimplemented!() +} + unsafe fn multiple_access(p: *const u8) { unsafe { maccess::(p); @@ -105,26 +153,11 @@ struct Threshold { pub num_false_miss: u32, } -unsafe fn only_flush_wrap(_: &(), addr: *const u8) -> u64 { - unsafe { only_flush(addr) } -} - -unsafe fn only_reload_wrap(_: &(), addr: *const u8) -> u64 { - unsafe { only_reload(addr) } -} - -unsafe fn load_and_flush_wrap(_: &(), addr: *const u8) -> u64 { - unsafe { load_and_flush(addr) } -} -unsafe fn flush_and_reload_wrap(_: &(), addr: *const u8) -> u64 { - unsafe { flush_and_reload(addr) } -} - -unsafe fn reload_and_flush_wrap(_: &(), addr: *const u8) -> u64 { - unsafe { reload_and_flush(addr) } -} - fn main() { + let measure_reload = + cache_utils::ip_tool::Function::try_new(1, 0, cache_utils::ip_tool::TIMED_MACCESS).unwrap(); + let measure_nop = + cache_utils::ip_tool::Function::try_new(1, 0, cache_utils::ip_tool::TIMED_NOP).unwrap(); // Grab a slice of memory let core_per_socket_out = Command::new("sh") @@ -172,13 +205,50 @@ fn main() { panic!("not aligned nicely"); } - let operations = [CalibrateOperation2T { - prepare: maccess::, - op: only_flush_wrap, - name: "clflush_remote_hit", - display_name: "clflush remote hit", - t: &(), - }]; + let operations = [ + CalibrateOperation2T { + prepare: prepare_RAM, + op: function_call, + name: "RAM_load", + display_name: "Load from RAM", + t: &measure_reload, + }, + CalibrateOperation2T { + prepare: prepare_pL3, + op: function_call, + name: "pL3_load", + display_name: "Load from L3 (prefetch)", + t: &measure_reload, + }, + CalibrateOperation2T { + prepare: prepare_pL2, + op: function_call, + name: "pL2_load", + display_name: "Load from L2 (prefetch)", + t: &measure_reload, + }, + CalibrateOperation2T { + prepare: prepare_pL1, + op: function_call, + name: "pL1_load", + display_name: "Load from L1 (prefetch)", + t: &measure_reload, + }, + CalibrateOperation2T { + prepare: prepare_L1, + op: function_call, + name: "L1_load", + display_name: "Load from L1 (Reload)", + t: &measure_reload, + }, + CalibrateOperation2T { + prepare: noop::, + op: function_call, + name: "pL3_load", + display_name: "Load from L3 (prefetch)", + t: &measure_nop, + }, + ]; let r = unsafe { calibrate_fixed_freq_2_thread( diff --git a/prefetcher_reverse/src/ip_tool.rs b/cache_utils/src/ip_tool.rs similarity index 92% rename from prefetcher_reverse/src/ip_tool.rs rename to cache_utils/src/ip_tool.rs index e0cc893..6674c07 100644 --- a/prefetcher_reverse/src/ip_tool.rs +++ b/cache_utils/src/ip_tool.rs @@ -1,9 +1,10 @@ +use crate::mmap::MMappedMemory; use bitvec::prelude::*; -use cache_utils::mmap::MMappedMemory; use lazy_static::lazy_static; use std::collections::LinkedList; use std::ptr::copy_nonoverlapping; use std::sync::Mutex; +use std::vec::Vec; struct WXRange { start: usize, @@ -56,6 +57,12 @@ pub const TIMED_CLFLUSH: FunctionTemplate = FunctionTemplate { end: timed_clflush_template_end as *const u8, }; +pub const TIMED_NOP: FunctionTemplate = FunctionTemplate { + start: timed_nop_template, + ip: timed_nop_template_ip as *const u8, + end: timed_nop_template_end as *const u8, +}; + impl WXRange { unsafe fn allocate( &mut self, @@ -274,6 +281,31 @@ global_asm!( ".global timed_clflush_template_end", "timed_clflush_template_end:", "nop", + ".global timed_nop_template", + "timed_nop_template:", + "mfence", + "lfence", + "rdtsc", + "shl rdx, 32", + "mov rsi, rdx", + "add rsi, rax", + "mfence", + "lfence", + ".global timed_nop_template_ip", + "timed_nop_template_ip:", + "nop", + "mfence", + "lfence", + "rdtsc", + "shl rdx, 32", + "add rax, rdx", + "mfence", + "lfence", + "sub rax, rsi", + "ret", + ".global timed_nop_template_end", + "timed_nop_template_end:", + "nop", ); extern "C" { @@ -283,6 +315,9 @@ extern "C" { fn timed_clflush_template(pointer: *const u8) -> u64; fn timed_clflush_template_ip(); fn timed_clflush_template_end(); + fn timed_nop_template(pointer: *const u8) -> u64; + fn timed_nop_template_ip(); + fn timed_nop_template_end(); } pub fn tmp_test() { diff --git a/cache_utils/src/lib.rs b/cache_utils/src/lib.rs index 8ec4180..d892938 100644 --- a/cache_utils/src/lib.rs +++ b/cache_utils/src/lib.rs @@ -1,8 +1,13 @@ #![cfg_attr(feature = "no_std", no_std)] #![feature(ptr_internals)] +#![feature(linked_list_cursors)] +#![feature(global_asm)] #![allow(clippy::missing_safety_doc)] #![deny(unsafe_op_in_unsafe_fn)] +use core::arch::x86_64 as arch_x86; +use core::ptr; + use static_assertions::assert_cfg; assert_cfg!( @@ -25,8 +30,8 @@ pub mod frequency; #[cfg(feature = "use_std")] mod calibrate_2t; -use core::arch::x86_64 as arch_x86; -use core::ptr; +#[cfg(feature = "use_std")] +pub mod ip_tool; // rdtsc no fence pub unsafe fn rdtsc_nofence() -> u64 {