diff --git a/cache_slice/src/arch.rs b/cache_slice/src/arch.rs index 987186d..7c1eaf7 100644 --- a/cache_slice/src/arch.rs +++ b/cache_slice/src/arch.rs @@ -198,24 +198,24 @@ const BROADWELL_XEON: XeonPerfCounters = XeonPerfCounters { const ALDER_LAKE_TO_RAPTOR_LAKE_CORE: CorePerfCounters = CorePerfCounters { max_slice: 10, msr_unc_perf_global_ctr: 0x2ff0, - val_enable_ctrs: 0, // TODO + val_enable_ctrs: 0x20000000, // To validate msr_unc_cbo_perfevtsel0: &[0x2000, 0x2008, 0x2010, 0x2018, 0x2020, 0x2028, 0x2030, 0x2038, 0x2040, 0x2048], msr_unc_cbo_per_ctr0: &[0x2002, 0x200a, 0x2012, 0x201a, 0x2022, 0x202a, 0x2032, 0x203a, 0x2042, 0x204a], - val_disable_ctrs: 0, // TODO - val_select_evt_core: 0, // TODO - val_reset_ctrs: 0, // TODO + val_disable_ctrs: 0x0, // To validate + val_select_evt_core: 0x408f34, // To validate + val_reset_ctrs: 0x0, // To validate }; // TODO verify his on ICELAKE, and appropriate values. Also deal with backport Cypress Cove ? const CANNON_LAKE_TO_TIGER_LAKE_CORE: CorePerfCounters = CorePerfCounters { - max_slice: 8, // TODO + max_slice: 8, // To validate msr_unc_perf_global_ctr: 0xe01, - val_enable_ctrs: 0, // TODO + val_enable_ctrs: 0x20000000, // To validate msr_unc_cbo_perfevtsel0: &[0x700, 0x708, 0x710, 0x718, 0x720, 0x728, 0x730, 0x738], msr_unc_cbo_per_ctr0: &[0x702, 0x70a, 0x712, 0x71a, 0x722, 0x72a, 0x732, 0x73a], - val_disable_ctrs: 0x0, // TODO - val_select_evt_core: 0, // TODO - val_reset_ctrs: 0x0, // TODO + val_disable_ctrs: 0x0, // To validate + val_select_evt_core: 0x408f34, // To validate + val_reset_ctrs: 0x0, // To validate }; const SKYLAKE_KABYLAKE_CORE: CorePerfCounters = CorePerfCounters { @@ -229,13 +229,23 @@ const SKYLAKE_KABYLAKE_CORE: CorePerfCounters = CorePerfCounters { val_reset_ctrs: 0x0, }; +// This is documented in Intel SDM, 20.3.4.6 (in March 2024 edition) + const SANDYBRIDGE_TO_BROADWELL_CORE: CorePerfCounters = CorePerfCounters { max_slice: 0, msr_unc_perf_global_ctr: 0x391, + // Go in MSR_UNC_PERF_GLOBAL_CTR EN (bit 29) set to one, and route PMI to core 1-4 upon overflow. val_enable_ctrs: 0x2000000f, msr_unc_cbo_perfevtsel0: &[0x700, 0x710, 0x720, 0x730], msr_unc_cbo_per_ctr0: &[0x706, 0x716, 0x726, 0x736], val_disable_ctrs: 0x0, + // Counter Mask (bit 28-24) 0, Inv (23) 0, EN (22) 1, OVF (20) 0, E (18) 0, + // Unit Mask (bit 15-8) 0x8f, Event Select (bit 7-0) 0x34 + // Event selection from https://perfmon-events.intel.com + // UNC_CBO_CACHE_LOOKUP.ANY_MESI + // L3 Lookup any request that access cache and found line in MESI-state. EventSel=34H UMask=8FH + // Counter=0,1 val_select_evt_core: 0x408f34, + // TODO val_reset_ctrs: 0x0, }; diff --git a/cache_slice/src/lib.rs b/cache_slice/src/lib.rs index 8c443fa..4dbfb73 100644 --- a/cache_slice/src/lib.rs +++ b/cache_slice/src/lib.rs @@ -23,7 +23,7 @@ impl From for Error { } } -const NUM_POKE: usize = 10000; +const NUM_POKE: usize = 100000; unsafe fn poke(addr: *const u8) { for _i in 0..NUM_POKE { @@ -79,7 +79,7 @@ unsafe fn monitor_xeon(addr: *const u8, cpu: u8, max_cbox: usize) -> Result Result Result, Error> { +fn monitor_core(addr: *const u8, cpu: u8, mut max_cbox: usize) -> Result, Error> { // Note, we need to add the workaround for one missing perf counter here. let performance_counters = if let Some(p) = get_performance_counters_core() { p @@ -98,6 +98,7 @@ fn monitor_core(addr: *const u8, cpu: u8, max_cbox: usize) -> Result, E }; let workaround = if (performance_counters.max_slice as usize) + 1 == max_cbox { + max_cbox = performance_counters.max_slice as usize; true } else if (performance_counters.max_slice as usize) >= max_cbox { false @@ -105,7 +106,39 @@ fn monitor_core(addr: *const u8, cpu: u8, max_cbox: usize) -> Result, E return Err(Error::InvalidParameter); }; - unimplemented!() + write_msr_on_cpu(performance_counters.msr_unc_perf_global_ctr, cpu, performance_counters.val_disable_ctrs)?; + + for i in 0..max_cbox { + write_msr_on_cpu(performance_counters.msr_unc_cbo_per_ctr0[i], cpu, performance_counters.val_reset_ctrs)?; + } + + for i in 0..max_cbox { + write_msr_on_cpu(performance_counters.msr_unc_cbo_perfevtsel0[i], cpu, performance_counters.val_select_evt_core)?; + } + + write_msr_on_cpu(performance_counters.msr_unc_perf_global_ctr, cpu, performance_counters.val_enable_ctrs)?; + + unsafe { poke(addr) }; + + /* + // Commented out in original code : TODO, check if this makes any difference ? + write_msr_on_cpu(performance_counters.msr_unc_perf_global_ctr, cpu, performance_counters.val_disable_ctrs)?; + + */ + // Read counters + let mut results = Vec::new(); + for i in 0..max_cbox { + let result = read_msr_on_cpu(performance_counters.msr_unc_cbo_per_ctr0[i], cpu)?; + if result < NUM_POKE as u64 { + results.push(0); + } else { + results.push(result - NUM_POKE as u64); + } + } + + write_msr_on_cpu(performance_counters.msr_unc_perf_global_ctr, cpu, performance_counters.val_disable_ctrs)?; + + Ok(results) } pub unsafe fn monitor_address(addr: *const u8, cpu: u8, max_cbox: u16) -> Result, Error> {