Compare commits

...

4 Commits

Author SHA1 Message Date
Guillaume DIDIER
45e5fa84a5 Fix missing CPU classification 2024-07-03 09:42:17 +02:00
Guillaume DIDIER
ddd24e9c20 Add newer Core CPUs, up to Raptor Lake 2024-07-03 09:11:33 +02:00
Guillaume DIDIER
d98c9f5a66 Scan now displays the scores for each cbox using a one line format
Also remove the duplication between core 0 and core i, after validation that it behaved the same
2024-07-03 08:53:43 +02:00
Guillaume DIDIER
69fb1dc2db Implement on Core CPUs 2024-07-03 08:52:12 +02:00
3 changed files with 102 additions and 26 deletions

View File

@ -39,7 +39,17 @@ pub(crate) fn determine_cpu_class() -> Option<CpuClass> {
| (0x06, 0x4e) | (0x06, 0x4e)
| (0x06, 0x5e) | (0x06, 0x5e)
| (0x06, 0x8e) | (0x06, 0x8e)
| (0x06, 0x9e) => { | (0x06, 0x9e)
| (0x06, 0xa5)
| (0x06, 0x66)
| (0x06, 0x7e)
| (0x06, 0x8c)
| (0x06, 0x8d)
| (0x06, 0xa7)
| (0x06, 0x9a)
| (0x06, 0x97)
| (0x06, 0xba)
| (0x06, 0xb7) => {
Some(IntelCore) Some(IntelCore)
} }
_ => { _ => {
@ -90,7 +100,22 @@ pub(crate) fn get_performance_counters_core() -> Option<&'static CorePerfCounter
0x4e 0x4e
| 0x5e | 0x5e
| 0x8e | 0x8e
| 0x9e => Some(&SKYLAKE_KABYLAKE_CORE), | 0x9e
| 0xa5 => Some(&SKYLAKE_KABYLAKE_CORE),
0x66
| 0x7e
| 0x8c
| 0x8d => Some(&CANNON_LAKE_TO_TIGER_LAKE_CORE),
0xa7 => {
eprintln!("Rocket Lake may be like Skylake or like Ice Lake.");
eprintln!("You need to edit the code in arch.rs, and validate if the perf counters work like any of those two.");
eprintln!("For now assuming Ice Lake by default");
Some(&CANNON_LAKE_TO_TIGER_LAKE_CORE)
}
0x9a
| 0x97
| 0xba
| 0xb7 => Some(&ALDER_LAKE_TO_RAPTOR_LAKE_CORE),
_ => None, _ => None,
} }
} }
@ -198,24 +223,24 @@ const BROADWELL_XEON: XeonPerfCounters = XeonPerfCounters {
const ALDER_LAKE_TO_RAPTOR_LAKE_CORE: CorePerfCounters = CorePerfCounters { const ALDER_LAKE_TO_RAPTOR_LAKE_CORE: CorePerfCounters = CorePerfCounters {
max_slice: 10, max_slice: 10,
msr_unc_perf_global_ctr: 0x2ff0, msr_unc_perf_global_ctr: 0x2ff0,
val_enable_ctrs: 0, // TODO val_enable_ctrs: 0x20000000, // To validate
msr_unc_cbo_perfevtsel0: &[0x2000, 0x2008, 0x2010, 0x2018, 0x2020, 0x2028, 0x2030, 0x2038, 0x2040, 0x2048], msr_unc_cbo_perfevtsel0: &[0x2000, 0x2008, 0x2010, 0x2018, 0x2020, 0x2028, 0x2030, 0x2038, 0x2040, 0x2048],
msr_unc_cbo_per_ctr0: &[0x2002, 0x200a, 0x2012, 0x201a, 0x2022, 0x202a, 0x2032, 0x203a, 0x2042, 0x204a], msr_unc_cbo_per_ctr0: &[0x2002, 0x200a, 0x2012, 0x201a, 0x2022, 0x202a, 0x2032, 0x203a, 0x2042, 0x204a],
val_disable_ctrs: 0, // TODO val_disable_ctrs: 0x0, // To validate
val_select_evt_core: 0, // TODO val_select_evt_core: 0x408f34, // To validate
val_reset_ctrs: 0, // TODO val_reset_ctrs: 0x0, // To validate
}; };
// TODO verify his on ICELAKE, and appropriate values. Also deal with backport Cypress Cove ? // TODO verify his on ICELAKE, and appropriate values. Also deal with backport Cypress Cove ?
const CANNON_LAKE_TO_TIGER_LAKE_CORE: CorePerfCounters = CorePerfCounters { const CANNON_LAKE_TO_TIGER_LAKE_CORE: CorePerfCounters = CorePerfCounters {
max_slice: 8, // TODO max_slice: 8, // To validate
msr_unc_perf_global_ctr: 0xe01, msr_unc_perf_global_ctr: 0xe01,
val_enable_ctrs: 0, // TODO val_enable_ctrs: 0x20000000, // To validate
msr_unc_cbo_perfevtsel0: &[0x700, 0x708, 0x710, 0x718, 0x720, 0x728, 0x730, 0x738], msr_unc_cbo_perfevtsel0: &[0x700, 0x708, 0x710, 0x718, 0x720, 0x728, 0x730, 0x738],
msr_unc_cbo_per_ctr0: &[0x702, 0x70a, 0x712, 0x71a, 0x722, 0x72a, 0x732, 0x73a], msr_unc_cbo_per_ctr0: &[0x702, 0x70a, 0x712, 0x71a, 0x722, 0x72a, 0x732, 0x73a],
val_disable_ctrs: 0x0, // TODO val_disable_ctrs: 0x0, // To validate
val_select_evt_core: 0, // TODO val_select_evt_core: 0x408f34, // To validate
val_reset_ctrs: 0x0, // TODO val_reset_ctrs: 0x0, // To validate
}; };
const SKYLAKE_KABYLAKE_CORE: CorePerfCounters = CorePerfCounters { const SKYLAKE_KABYLAKE_CORE: CorePerfCounters = CorePerfCounters {
@ -229,13 +254,23 @@ const SKYLAKE_KABYLAKE_CORE: CorePerfCounters = CorePerfCounters {
val_reset_ctrs: 0x0, val_reset_ctrs: 0x0,
}; };
// This is documented in Intel SDM, 20.3.4.6 (in March 2024 edition)
const SANDYBRIDGE_TO_BROADWELL_CORE: CorePerfCounters = CorePerfCounters { const SANDYBRIDGE_TO_BROADWELL_CORE: CorePerfCounters = CorePerfCounters {
max_slice: 0, max_slice: 0,
msr_unc_perf_global_ctr: 0x391, msr_unc_perf_global_ctr: 0x391,
// Go in MSR_UNC_PERF_GLOBAL_CTR EN (bit 29) set to one, and route PMI to core 1-4 upon overflow.
val_enable_ctrs: 0x2000000f, val_enable_ctrs: 0x2000000f,
msr_unc_cbo_perfevtsel0: &[0x700, 0x710, 0x720, 0x730], msr_unc_cbo_perfevtsel0: &[0x700, 0x710, 0x720, 0x730],
msr_unc_cbo_per_ctr0: &[0x706, 0x716, 0x726, 0x736], msr_unc_cbo_per_ctr0: &[0x706, 0x716, 0x726, 0x736],
val_disable_ctrs: 0x0, val_disable_ctrs: 0x0,
// Counter Mask (bit 28-24) 0, Inv (23) 0, EN (22) 1, OVF (20) 0, E (18) 0,
// Unit Mask (bit 15-8) 0x8f, Event Select (bit 7-0) 0x34
// Event selection from https://perfmon-events.intel.com
// UNC_CBO_CACHE_LOOKUP.ANY_MESI
// L3 Lookup any request that access cache and found line in MESI-state. EventSel=34H UMask=8FH
// Counter=0,1
val_select_evt_core: 0x408f34, val_select_evt_core: 0x408f34,
// TODO
val_reset_ctrs: 0x0, val_reset_ctrs: 0x0,
}; };

View File

@ -1,4 +1,4 @@
use cache_slice::determine_slice; use cache_slice::monitor_address;
use cache_slice::utils::core_per_package; use cache_slice::utils::core_per_package;
use nix::sched::{sched_getaffinity, sched_setaffinity, CpuSet}; use nix::sched::{sched_getaffinity, sched_setaffinity, CpuSet};
use nix::unistd::Pid; use nix::unistd::Pid;
@ -23,28 +23,36 @@ pub fn main() {
let mut cpu_set = CpuSet::new(); let mut cpu_set = CpuSet::new();
cpu_set.set(core).unwrap(); cpu_set.set(core).unwrap();
sched_setaffinity(Pid::this(), &cpu_set).unwrap(); sched_setaffinity(Pid::this(), &cpu_set).unwrap();
for addr in target.iter() { for addr in target.iter().step_by(8) {
let slice = determine_slice(addr as *const u64 as *const u8, core as u8, nb_cores); let address = addr as *const u64 as *const u8;
let res = unsafe { monitor_address(address, core as u8, nb_cores) }.unwrap();
print!("({:2}) {:x}:", core, address as usize);
for slice in res {
print!(" {:6}", slice)
}
println!();
/*let slice = res.iter().enumerate().max_by_key(|(_i, val)| { **val });
match slice { match slice {
Some(slice) => { Some((slice, _)) => {
println!("({:2}) Slice for addr {:x}: {}", core, addr as *const u64 as usize, slice) println!("({:2}) Slice for addr {:x}: {}", core, addr as *const u64 as usize, slice)
} }
None => { None => {
eprintln!("({:2}) Failed to find slice for addr {:x}", core, addr as *const u64 as usize) eprintln!("({:2}) Failed to find slice for addr {:x}", core, addr as *const u64 as usize)
} }
}*/
} }
} /*for addr in target.iter() {
for addr in target.iter() { let res = unsafe { monitor_address(addr as *const u64 as *const u8, 0, nb_cores) }.unwrap();
let slice = determine_slice(addr as *const u64 as *const u8, 0, nb_cores); let slice = res.iter().enumerate().max_by_key(|(_i, val)| { **val });
match slice { match slice {
Some(slice) => { Some((slice, _)) => {
println!("({:2}) Slice for addr {:x}: {}", core, addr as *const u64 as usize, slice) println!("({:2}) Slice for addr {:x}: {}", 0, addr as *const u64 as usize, slice)
} }
None => { None => {
eprintln!("({:2}) Failed to find slice for addr {:x}", core, addr as *const u64 as usize) eprintln!("({:2}) Failed to find slice for addr {:x}", 0, addr as *const u64 as usize)
}
} }
} }
}*/
sched_setaffinity(Pid::this(), &old).unwrap(); sched_setaffinity(Pid::this(), &old).unwrap();
} }
} }

View File

@ -23,7 +23,7 @@ impl From<std::io::Error> for Error {
} }
} }
const NUM_POKE: usize = 10000; const NUM_POKE: usize = 100000;
unsafe fn poke(addr: *const u8) { unsafe fn poke(addr: *const u8) {
for _i in 0..NUM_POKE { for _i in 0..NUM_POKE {
@ -79,7 +79,7 @@ unsafe fn monitor_xeon(addr: *const u8, cpu: u8, max_cbox: usize) -> Result<Vec<
let mut results = Vec::new(); let mut results = Vec::new();
for i in 0..max_cbox { for i in 0..max_cbox {
let result = read_msr_on_cpu(performance_counters.msr_pmon_ctr0[i], cpu)?; let result = read_msr_on_cpu(performance_counters.msr_pmon_ctr0[i], cpu)?;
if (result as i64 - NUM_POKE as i64) < 0 { if result < NUM_POKE as u64 {
results.push(0); results.push(0);
} else { } else {
results.push(result - NUM_POKE as u64); results.push(result - NUM_POKE as u64);
@ -89,7 +89,7 @@ unsafe fn monitor_xeon(addr: *const u8, cpu: u8, max_cbox: usize) -> Result<Vec<
Ok(results) Ok(results)
} }
fn monitor_core(addr: *const u8, cpu: u8, max_cbox: usize) -> Result<Vec<u64>, Error> { fn monitor_core(addr: *const u8, cpu: u8, mut max_cbox: usize) -> Result<Vec<u64>, Error> {
// Note, we need to add the workaround for one missing perf counter here. // Note, we need to add the workaround for one missing perf counter here.
let performance_counters = if let Some(p) = get_performance_counters_core() { let performance_counters = if let Some(p) = get_performance_counters_core() {
p p
@ -98,6 +98,7 @@ fn monitor_core(addr: *const u8, cpu: u8, max_cbox: usize) -> Result<Vec<u64>, E
}; };
let workaround = if (performance_counters.max_slice as usize) + 1 == max_cbox { let workaround = if (performance_counters.max_slice as usize) + 1 == max_cbox {
max_cbox = performance_counters.max_slice as usize;
true true
} else if (performance_counters.max_slice as usize) >= max_cbox { } else if (performance_counters.max_slice as usize) >= max_cbox {
false false
@ -105,7 +106,39 @@ fn monitor_core(addr: *const u8, cpu: u8, max_cbox: usize) -> Result<Vec<u64>, E
return Err(Error::InvalidParameter); return Err(Error::InvalidParameter);
}; };
unimplemented!() write_msr_on_cpu(performance_counters.msr_unc_perf_global_ctr, cpu, performance_counters.val_disable_ctrs)?;
for i in 0..max_cbox {
write_msr_on_cpu(performance_counters.msr_unc_cbo_per_ctr0[i], cpu, performance_counters.val_reset_ctrs)?;
}
for i in 0..max_cbox {
write_msr_on_cpu(performance_counters.msr_unc_cbo_perfevtsel0[i], cpu, performance_counters.val_select_evt_core)?;
}
write_msr_on_cpu(performance_counters.msr_unc_perf_global_ctr, cpu, performance_counters.val_enable_ctrs)?;
unsafe { poke(addr) };
/*
// Commented out in original code : TODO, check if this makes any difference ?
write_msr_on_cpu(performance_counters.msr_unc_perf_global_ctr, cpu, performance_counters.val_disable_ctrs)?;
*/
// Read counters
let mut results = Vec::new();
for i in 0..max_cbox {
let result = read_msr_on_cpu(performance_counters.msr_unc_cbo_per_ctr0[i], cpu)?;
if result < NUM_POKE as u64 {
results.push(0);
} else {
results.push(result - NUM_POKE as u64);
}
}
write_msr_on_cpu(performance_counters.msr_unc_perf_global_ctr, cpu, performance_counters.val_disable_ctrs)?;
Ok(results)
} }
pub unsafe fn monitor_address(addr: *const u8, cpu: u8, max_cbox: u16) -> Result<Vec<u64>, Error> { pub unsafe fn monitor_address(addr: *const u8, cpu: u8, max_cbox: u16) -> Result<Vec<u64>, Error> {