Compare commits
5 Commits
c16a503828
...
3791c1f6f3
Author | SHA1 | Date | |
---|---|---|---|
3791c1f6f3 | |||
|
23cf8eaee6 | ||
|
123a434006 | ||
|
a7f1840bb9 | ||
|
a937debf7c |
@ -135,6 +135,7 @@ pub struct XeonPerfCounters {
|
|||||||
}
|
}
|
||||||
|
|
||||||
pub struct CorePerfCounters {
|
pub struct CorePerfCounters {
|
||||||
|
pub msr_unc_cbo_config: u64,
|
||||||
pub max_slice: u16,
|
pub max_slice: u16,
|
||||||
pub msr_unc_perf_global_ctr: u64,
|
pub msr_unc_perf_global_ctr: u64,
|
||||||
pub val_enable_ctrs: u64,
|
pub val_enable_ctrs: u64,
|
||||||
@ -221,6 +222,7 @@ const BROADWELL_XEON: XeonPerfCounters = XeonPerfCounters {
|
|||||||
|
|
||||||
// TODO find appropriate values
|
// TODO find appropriate values
|
||||||
const ALDER_LAKE_TO_RAPTOR_LAKE_CORE: CorePerfCounters = CorePerfCounters {
|
const ALDER_LAKE_TO_RAPTOR_LAKE_CORE: CorePerfCounters = CorePerfCounters {
|
||||||
|
msr_unc_cbo_config: 0x396,
|
||||||
max_slice: 10,
|
max_slice: 10,
|
||||||
msr_unc_perf_global_ctr: 0x2ff0,
|
msr_unc_perf_global_ctr: 0x2ff0,
|
||||||
val_enable_ctrs: 0x20000000, // To validate
|
val_enable_ctrs: 0x20000000, // To validate
|
||||||
@ -233,6 +235,7 @@ const ALDER_LAKE_TO_RAPTOR_LAKE_CORE: CorePerfCounters = CorePerfCounters {
|
|||||||
|
|
||||||
// TODO verify his on ICELAKE, and appropriate values. Also deal with backport Cypress Cove ?
|
// TODO verify his on ICELAKE, and appropriate values. Also deal with backport Cypress Cove ?
|
||||||
const CANNON_LAKE_TO_TIGER_LAKE_CORE: CorePerfCounters = CorePerfCounters {
|
const CANNON_LAKE_TO_TIGER_LAKE_CORE: CorePerfCounters = CorePerfCounters {
|
||||||
|
msr_unc_cbo_config: 0x396,
|
||||||
max_slice: 8, // To validate
|
max_slice: 8, // To validate
|
||||||
msr_unc_perf_global_ctr: 0xe01,
|
msr_unc_perf_global_ctr: 0xe01,
|
||||||
val_enable_ctrs: 0x20000000, // To validate
|
val_enable_ctrs: 0x20000000, // To validate
|
||||||
@ -244,6 +247,7 @@ const CANNON_LAKE_TO_TIGER_LAKE_CORE: CorePerfCounters = CorePerfCounters {
|
|||||||
};
|
};
|
||||||
|
|
||||||
const SKYLAKE_KABYLAKE_CORE: CorePerfCounters = CorePerfCounters {
|
const SKYLAKE_KABYLAKE_CORE: CorePerfCounters = CorePerfCounters {
|
||||||
|
msr_unc_cbo_config: 0x396,
|
||||||
max_slice: 7,
|
max_slice: 7,
|
||||||
msr_unc_perf_global_ctr: 0xe01,
|
msr_unc_perf_global_ctr: 0xe01,
|
||||||
val_enable_ctrs: 0x20000000,
|
val_enable_ctrs: 0x20000000,
|
||||||
@ -257,7 +261,8 @@ const SKYLAKE_KABYLAKE_CORE: CorePerfCounters = CorePerfCounters {
|
|||||||
// This is documented in Intel SDM, 20.3.4.6 (in March 2024 edition)
|
// This is documented in Intel SDM, 20.3.4.6 (in March 2024 edition)
|
||||||
|
|
||||||
const SANDYBRIDGE_TO_BROADWELL_CORE: CorePerfCounters = CorePerfCounters {
|
const SANDYBRIDGE_TO_BROADWELL_CORE: CorePerfCounters = CorePerfCounters {
|
||||||
max_slice: 0,
|
msr_unc_cbo_config: 0x396,
|
||||||
|
max_slice: 4,
|
||||||
msr_unc_perf_global_ctr: 0x391,
|
msr_unc_perf_global_ctr: 0x391,
|
||||||
// Go in MSR_UNC_PERF_GLOBAL_CTR EN (bit 29) set to one, and route PMI to core 1-4 upon overflow.
|
// Go in MSR_UNC_PERF_GLOBAL_CTR EN (bit 29) set to one, and route PMI to core 1-4 upon overflow.
|
||||||
val_enable_ctrs: 0x2000000f,
|
val_enable_ctrs: 0x2000000f,
|
||||||
|
@ -3,7 +3,7 @@
|
|||||||
use std::arch::x86_64::_mm_clflush;
|
use std::arch::x86_64::_mm_clflush;
|
||||||
use crate::arch::CpuClass::{IntelCore, IntelXeon, IntelXeonSP};
|
use crate::arch::CpuClass::{IntelCore, IntelXeon, IntelXeonSP};
|
||||||
use crate::arch::{get_performance_counters_core, get_performance_counters_xeon};
|
use crate::arch::{get_performance_counters_core, get_performance_counters_xeon};
|
||||||
use crate::Error::UnsupportedCPU;
|
use crate::Error::{InvalidParameter, UnsupportedCPU};
|
||||||
use crate::msr::{read_msr_on_cpu, write_msr_on_cpu};
|
use crate::msr::{read_msr_on_cpu, write_msr_on_cpu};
|
||||||
|
|
||||||
pub mod msr;
|
pub mod msr;
|
||||||
@ -39,7 +39,7 @@ unsafe fn monitor_xeon(addr: *const u8, cpu: u8, max_cbox: usize) -> Result<Vec<
|
|||||||
};
|
};
|
||||||
|
|
||||||
if (performance_counters.max_slice as usize) < max_cbox {
|
if (performance_counters.max_slice as usize) < max_cbox {
|
||||||
return Err(Error::InvalidParameter);
|
return Err(InvalidParameter);
|
||||||
}
|
}
|
||||||
|
|
||||||
// Freeze counters
|
// Freeze counters
|
||||||
@ -89,33 +89,44 @@ unsafe fn monitor_xeon(addr: *const u8, cpu: u8, max_cbox: usize) -> Result<Vec<
|
|||||||
Ok(results)
|
Ok(results)
|
||||||
}
|
}
|
||||||
|
|
||||||
fn monitor_core(addr: *const u8, cpu: u8, mut max_cbox: usize) -> Result<Vec<u64>, Error> {
|
fn monitor_core(addr: *const u8, cpu: u8) -> Result<Vec<u64>, Error> {
|
||||||
// Note, we need to add the workaround for one missing perf counter here.
|
// Note, we need to add the workaround for one missing perf counter here.
|
||||||
let performance_counters = if let Some(p) = get_performance_counters_core() {
|
let performance_counters = if let Some(p) = get_performance_counters_core() {
|
||||||
p
|
p
|
||||||
} else {
|
} else {
|
||||||
return Err(UnsupportedCPU);
|
return Err(UnsupportedCPU);
|
||||||
};
|
};
|
||||||
|
#[cfg(debug_assertions)]
|
||||||
|
eprint!("Finding the number of CBox available... ");
|
||||||
|
let max_cbox = (read_msr_on_cpu(performance_counters.msr_unc_cbo_config, cpu)? & 0xF) as usize; // TODO magic number (mask for bit 3:0)
|
||||||
|
#[cfg(debug_assertions)]
|
||||||
|
eprintln!("{}", max_cbox);
|
||||||
|
|
||||||
let workaround = if (performance_counters.max_slice as usize) + 1 == max_cbox {
|
if max_cbox > performance_counters.max_slice as usize {
|
||||||
max_cbox = performance_counters.max_slice as usize;
|
return Err(InvalidParameter);
|
||||||
true
|
|
||||||
} else if (performance_counters.max_slice as usize) >= max_cbox {
|
|
||||||
false
|
|
||||||
} else {
|
|
||||||
return Err(Error::InvalidParameter);
|
|
||||||
};
|
|
||||||
|
|
||||||
write_msr_on_cpu(performance_counters.msr_unc_perf_global_ctr, cpu, performance_counters.val_disable_ctrs)?;
|
|
||||||
|
|
||||||
for i in 0..max_cbox {
|
|
||||||
write_msr_on_cpu(performance_counters.msr_unc_cbo_per_ctr0[i], cpu, performance_counters.val_reset_ctrs)?;
|
|
||||||
}
|
}
|
||||||
|
|
||||||
|
#[cfg(debug_assertions)]
|
||||||
|
eprintln!("Disabling counters");
|
||||||
|
write_msr_on_cpu(performance_counters.msr_unc_perf_global_ctr, cpu, performance_counters.val_disable_ctrs)?;
|
||||||
|
|
||||||
|
#[cfg(debug_assertions)]
|
||||||
|
eprint!("Resetting counters...");
|
||||||
|
for i in 0..max_cbox {
|
||||||
|
eprint!(" {i}");
|
||||||
|
write_msr_on_cpu(performance_counters.msr_unc_cbo_per_ctr0[i], cpu, performance_counters.val_reset_ctrs)?;
|
||||||
|
}
|
||||||
|
#[cfg(debug_assertions)]
|
||||||
|
eprintln!(" ok");
|
||||||
|
|
||||||
|
#[cfg(debug_assertions)]
|
||||||
|
eprintln!("Selecting events");
|
||||||
for i in 0..max_cbox {
|
for i in 0..max_cbox {
|
||||||
write_msr_on_cpu(performance_counters.msr_unc_cbo_perfevtsel0[i], cpu, performance_counters.val_select_evt_core)?;
|
write_msr_on_cpu(performance_counters.msr_unc_cbo_perfevtsel0[i], cpu, performance_counters.val_select_evt_core)?;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
#[cfg(debug_assertions)]
|
||||||
|
eprintln!("enabling counters");
|
||||||
write_msr_on_cpu(performance_counters.msr_unc_perf_global_ctr, cpu, performance_counters.val_enable_ctrs)?;
|
write_msr_on_cpu(performance_counters.msr_unc_perf_global_ctr, cpu, performance_counters.val_enable_ctrs)?;
|
||||||
|
|
||||||
unsafe { poke(addr) };
|
unsafe { poke(addr) };
|
||||||
@ -126,6 +137,8 @@ fn monitor_core(addr: *const u8, cpu: u8, mut max_cbox: usize) -> Result<Vec<u64
|
|||||||
|
|
||||||
*/
|
*/
|
||||||
// Read counters
|
// Read counters
|
||||||
|
#[cfg(debug_assertions)]
|
||||||
|
eprintln!("Gathering results");
|
||||||
let mut results = Vec::new();
|
let mut results = Vec::new();
|
||||||
for i in 0..max_cbox {
|
for i in 0..max_cbox {
|
||||||
let result = read_msr_on_cpu(performance_counters.msr_unc_cbo_per_ctr0[i], cpu)?;
|
let result = read_msr_on_cpu(performance_counters.msr_unc_cbo_per_ctr0[i], cpu)?;
|
||||||
@ -136,15 +149,18 @@ fn monitor_core(addr: *const u8, cpu: u8, mut max_cbox: usize) -> Result<Vec<u64
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
#[cfg(debug_assertions)]
|
||||||
|
eprintln!("disabling counters again");
|
||||||
write_msr_on_cpu(performance_counters.msr_unc_perf_global_ctr, cpu, performance_counters.val_disable_ctrs)?;
|
write_msr_on_cpu(performance_counters.msr_unc_perf_global_ctr, cpu, performance_counters.val_disable_ctrs)?;
|
||||||
|
|
||||||
Ok(results)
|
Ok(results)
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// Note: max_cbox is not used on Intel Core.
|
||||||
pub unsafe fn monitor_address(addr: *const u8, cpu: u8, max_cbox: u16) -> Result<Vec<u64>, Error> {
|
pub unsafe fn monitor_address(addr: *const u8, cpu: u8, max_cbox: u16) -> Result<Vec<u64>, Error> {
|
||||||
match arch::determine_cpu_class() {
|
match arch::determine_cpu_class() {
|
||||||
Some(IntelCore) => {
|
Some(IntelCore) => {
|
||||||
unsafe { monitor_core(addr, cpu, max_cbox as usize) }
|
unsafe { monitor_core(addr, cpu) }
|
||||||
}
|
}
|
||||||
Some(IntelXeon) => {
|
Some(IntelXeon) => {
|
||||||
unsafe { monitor_xeon(addr, cpu, max_cbox as usize) }
|
unsafe { monitor_xeon(addr, cpu, max_cbox as usize) }
|
||||||
|
Loading…
Reference in New Issue
Block a user