diff --git a/cache_slice/src/arch.rs b/cache_slice/src/arch.rs index d9e6bf3..e8e3a8b 100644 --- a/cache_slice/src/arch.rs +++ b/cache_slice/src/arch.rs @@ -135,6 +135,7 @@ pub struct XeonPerfCounters { } pub struct CorePerfCounters { + pub msr_unc_cbo_config: u64, pub max_slice: u16, pub msr_unc_perf_global_ctr: u64, pub val_enable_ctrs: u64, @@ -221,6 +222,7 @@ const BROADWELL_XEON: XeonPerfCounters = XeonPerfCounters { // TODO find appropriate values const ALDER_LAKE_TO_RAPTOR_LAKE_CORE: CorePerfCounters = CorePerfCounters { + msr_unc_cbo_config: 0x396, max_slice: 10, msr_unc_perf_global_ctr: 0x2ff0, val_enable_ctrs: 0x20000000, // To validate @@ -233,6 +235,7 @@ const ALDER_LAKE_TO_RAPTOR_LAKE_CORE: CorePerfCounters = CorePerfCounters { // TODO verify his on ICELAKE, and appropriate values. Also deal with backport Cypress Cove ? const CANNON_LAKE_TO_TIGER_LAKE_CORE: CorePerfCounters = CorePerfCounters { + msr_unc_cbo_config: 0x396, max_slice: 8, // To validate msr_unc_perf_global_ctr: 0xe01, val_enable_ctrs: 0x20000000, // To validate @@ -244,6 +247,7 @@ const CANNON_LAKE_TO_TIGER_LAKE_CORE: CorePerfCounters = CorePerfCounters { }; const SKYLAKE_KABYLAKE_CORE: CorePerfCounters = CorePerfCounters { + msr_unc_cbo_config: 0x396, max_slice: 7, msr_unc_perf_global_ctr: 0xe01, val_enable_ctrs: 0x20000000, @@ -257,7 +261,8 @@ const SKYLAKE_KABYLAKE_CORE: CorePerfCounters = CorePerfCounters { // This is documented in Intel SDM, 20.3.4.6 (in March 2024 edition) const SANDYBRIDGE_TO_BROADWELL_CORE: CorePerfCounters = CorePerfCounters { - max_slice: 0, + msr_unc_cbo_config: 0x396, + max_slice: 4, msr_unc_perf_global_ctr: 0x391, // Go in MSR_UNC_PERF_GLOBAL_CTR EN (bit 29) set to one, and route PMI to core 1-4 upon overflow. val_enable_ctrs: 0x2000000f, diff --git a/cache_slice/src/lib.rs b/cache_slice/src/lib.rs index 4dbfb73..828fd3a 100644 --- a/cache_slice/src/lib.rs +++ b/cache_slice/src/lib.rs @@ -3,7 +3,7 @@ use std::arch::x86_64::_mm_clflush; use crate::arch::CpuClass::{IntelCore, IntelXeon, IntelXeonSP}; use crate::arch::{get_performance_counters_core, get_performance_counters_xeon}; -use crate::Error::UnsupportedCPU; +use crate::Error::{InvalidParameter, UnsupportedCPU}; use crate::msr::{read_msr_on_cpu, write_msr_on_cpu}; pub mod msr; @@ -39,7 +39,7 @@ unsafe fn monitor_xeon(addr: *const u8, cpu: u8, max_cbox: usize) -> Result Result Result, Error> { +fn monitor_core(addr: *const u8, cpu: u8) -> Result, Error> { // Note, we need to add the workaround for one missing perf counter here. let performance_counters = if let Some(p) = get_performance_counters_core() { p } else { return Err(UnsupportedCPU); }; + #[cfg(debug_assertions)] + eprint!("Finding the number of CBox available... "); + let max_cbox = (read_msr_on_cpu(performance_counters.msr_unc_cbo_config, cpu)? & 0xF) as usize; // TODO magic number (mask for bit 3:0) + #[cfg(debug_assertions)] + eprintln!("{}", max_cbox); - let workaround = if (performance_counters.max_slice as usize) + 1 == max_cbox { - max_cbox = performance_counters.max_slice as usize; - true - } else if (performance_counters.max_slice as usize) >= max_cbox { - false - } else { - return Err(Error::InvalidParameter); - }; - - write_msr_on_cpu(performance_counters.msr_unc_perf_global_ctr, cpu, performance_counters.val_disable_ctrs)?; - - for i in 0..max_cbox { - write_msr_on_cpu(performance_counters.msr_unc_cbo_per_ctr0[i], cpu, performance_counters.val_reset_ctrs)?; + if max_cbox > performance_counters.max_slice as usize { + return Err(InvalidParameter); } + #[cfg(debug_assertions)] + eprintln!("Disabling counters"); + write_msr_on_cpu(performance_counters.msr_unc_perf_global_ctr, cpu, performance_counters.val_disable_ctrs)?; + + #[cfg(debug_assertions)] + eprint!("Resetting counters..."); + for i in 0..max_cbox { + eprint!(" {i}"); + write_msr_on_cpu(performance_counters.msr_unc_cbo_per_ctr0[i], cpu, performance_counters.val_reset_ctrs)?; + } + #[cfg(debug_assertions)] + eprintln!(" ok"); + + #[cfg(debug_assertions)] + eprintln!("Selecting events"); for i in 0..max_cbox { write_msr_on_cpu(performance_counters.msr_unc_cbo_perfevtsel0[i], cpu, performance_counters.val_select_evt_core)?; } + #[cfg(debug_assertions)] + eprintln!("enabling counters"); write_msr_on_cpu(performance_counters.msr_unc_perf_global_ctr, cpu, performance_counters.val_enable_ctrs)?; unsafe { poke(addr) }; @@ -126,6 +137,8 @@ fn monitor_core(addr: *const u8, cpu: u8, mut max_cbox: usize) -> Result Result Result, Error> { match arch::determine_cpu_class() { Some(IntelCore) => { - unsafe { monitor_core(addr, cpu, max_cbox as usize) } + unsafe { monitor_core(addr, cpu) } } Some(IntelXeon) => { unsafe { monitor_xeon(addr, cpu, max_cbox as usize) }