Implement slice scanning on Xeon
This commit is contained in:
parent
1876dc7db4
commit
ba87550b65
@ -4,3 +4,5 @@ version = "0.1.0"
|
||||
edition = "2021"
|
||||
|
||||
[dependencies]
|
||||
raw-cpuid = "11.0.2"
|
||||
nix = "0.29.0"
|
||||
|
241
cache_slice/src/arch.rs
Normal file
241
cache_slice/src/arch.rs
Normal file
@ -0,0 +1,241 @@
|
||||
use raw_cpuid::CpuId;
|
||||
use crate::arch::CpuClass::{IntelCore, IntelXeon, IntelXeonSP};
|
||||
|
||||
pub(crate) enum CpuClass {
|
||||
IntelCore,
|
||||
IntelXeon,
|
||||
IntelXeonSP,
|
||||
// Add further CPUs later on
|
||||
}
|
||||
|
||||
pub(crate) fn determine_cpu_class() -> Option<CpuClass> {
|
||||
let cpuid = CpuId::new();
|
||||
let info = if let Some(info) = cpuid.get_feature_info() {
|
||||
info
|
||||
} else {
|
||||
return None;
|
||||
};
|
||||
|
||||
// Todo, sift through the documentation to add support for more CPUs
|
||||
match (info.family_id(), info.model_id()) {
|
||||
(0x06, 0x4f)
|
||||
| (0x06, 0x2d)
|
||||
| (0x06, 0x3e)
|
||||
| (06, 0x3f)
|
||||
| (0x06, 0x56) => {
|
||||
Some(IntelXeon)
|
||||
}
|
||||
(0x06, 0x55) => {
|
||||
Some(IntelXeonSP)
|
||||
}
|
||||
// 42, 58, 60, 69, 70, 61, 71, 78, 94, 142, 158
|
||||
(0x06, 0x2a)
|
||||
| (0x06, 0x3a)
|
||||
| (0x06, 0x3c)
|
||||
| (0x06, 0x45)
|
||||
| (0x06, 0x46)
|
||||
| (0x06, 0x3d)
|
||||
| (0x06, 0x47)
|
||||
| (0x06, 0x4e)
|
||||
| (0x06, 0x5e)
|
||||
| (0x06, 0x8e)
|
||||
| (0x06, 0x9e) => {
|
||||
Some(IntelCore)
|
||||
}
|
||||
_ => {
|
||||
None
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
pub(crate) fn get_performance_counters_xeon() -> Option<&'static XeonPerfCounters> {
|
||||
let cpuid = CpuId::new();
|
||||
let info = if let Some(info) = cpuid.get_feature_info() {
|
||||
info
|
||||
} else {
|
||||
return None;
|
||||
};
|
||||
if info.family_id() != 6 {
|
||||
return None;
|
||||
}
|
||||
match info.model_id() {
|
||||
0x2d /* 45 */ => Some(&SANDY_BRIDGE_XEON),
|
||||
0x3e /* 62 */ => Some(&IVY_BRIDGE_XEON),
|
||||
0x3f /* 63 */ => Some(&HASWELL_XEON),
|
||||
0x56 /* 86 */ => Some(&BROADWELL_XEON),
|
||||
_ => None,
|
||||
}
|
||||
}
|
||||
|
||||
pub(crate) fn get_performance_counters_core() -> Option<&'static CorePerfCounters> {
|
||||
let cpuid = CpuId::new();
|
||||
let info = if let Some(info) = cpuid.get_feature_info() {
|
||||
info
|
||||
} else {
|
||||
return None;
|
||||
};
|
||||
if info.family_id() != 6 {
|
||||
return None;
|
||||
}
|
||||
// TODO, review if the list can be extended to further CPUs
|
||||
// TODO, add post Cannon Lake stuff
|
||||
match info.model_id() {
|
||||
0x2a
|
||||
| 0x3a
|
||||
| 0x3c
|
||||
| 0x45
|
||||
| 0x46
|
||||
| 0x3d
|
||||
| 0x47 => Some(&SANDYBRIDGE_TO_BROADWELL_CORE),
|
||||
0x4e
|
||||
| 0x5e
|
||||
| 0x8e
|
||||
| 0x9e => Some(&SKYLAKE_KABYLAKE_CORE),
|
||||
_ => None,
|
||||
}
|
||||
}
|
||||
|
||||
pub struct XeonPerfCounters {
|
||||
pub max_slice: u16,
|
||||
pub msr_pmon_ctr0: &'static [u64],
|
||||
pub msr_pmon_box_filter: &'static [u64],
|
||||
pub msr_pmon_ctl0: &'static [u64],
|
||||
pub msr_pmon_box_ctl: &'static [u64],
|
||||
pub val_box_freeze: u64,
|
||||
pub val_box_reset: u64,
|
||||
pub val_enable_counting: u64,
|
||||
pub val_select_event: u64,
|
||||
pub val_filter: u64,
|
||||
pub val_box_unfreeze: u64,
|
||||
}
|
||||
|
||||
pub struct CorePerfCounters {
|
||||
pub max_slice: u16,
|
||||
pub msr_unc_perf_global_ctr: u64,
|
||||
pub val_enable_ctrs: u64,
|
||||
pub msr_unc_cbo_perfevtsel0: &'static [u64],
|
||||
pub msr_unc_cbo_per_ctr0: &'static [u64],
|
||||
pub val_disable_ctrs: u64,
|
||||
pub val_select_evt_core: u64,
|
||||
pub val_reset_ctrs: u64,
|
||||
}
|
||||
|
||||
const SANDY_BRIDGE_XEON: XeonPerfCounters = XeonPerfCounters {
|
||||
max_slice: 8,
|
||||
msr_pmon_ctr0: &[0xd16, 0xd36, 0xd56, 0xd76,
|
||||
0xd96, 0xdb6, 0xdd6, 0xdf6],
|
||||
msr_pmon_box_filter: &[0xd14, 0xd34, 0xd54, 0xd74, 0xd94, 0xdb4, 0xdd4, 0xdf4],
|
||||
msr_pmon_ctl0: &[0xd10, 0xd30, 0xd50, 0xd70,
|
||||
0xd90, 0xdb0, 0xdd0, 0xdf0],
|
||||
msr_pmon_box_ctl: &[0xd04, 0xd24, 0xd44, 0xd64, 0xd84, 0xda4, 0xdc4, 0xde4],
|
||||
val_box_freeze: 0x10100,
|
||||
val_box_reset: 0x10103,
|
||||
val_enable_counting: 0x400000,
|
||||
val_select_event: 0x401134,
|
||||
val_filter: 0x7c0000,
|
||||
val_box_unfreeze: 0x10000,
|
||||
};
|
||||
|
||||
const IVY_BRIDGE_XEON: XeonPerfCounters = XeonPerfCounters {
|
||||
max_slice: 15,
|
||||
msr_pmon_ctr0: &[0xd16, 0xd36, 0xd56, 0xd76, 0xd96, 0xdb6, 0xdd6, 0xdf6,
|
||||
0xe16, 0xe36, 0xe56, 0xe76, 0xe96, 0xeb6, 0xed6],
|
||||
msr_pmon_box_filter: &[0xd14, 0xd34, 0xd54, 0xd74, 0xd94, 0xdb4, 0xdd4, 0xdf4,
|
||||
0xe14, 0xe34, 0xe54, 0xe74, 0xe94, 0xeb4, 0xed4],
|
||||
msr_pmon_ctl0: &[0xd10, 0xd30, 0xd50, 0xd70, 0xd90, 0xdb0, 0xdd0, 0xdf0,
|
||||
0xe10, 0xe30, 0xe50, 0xe70, 0xe90, 0xeb0, 0xed0],
|
||||
msr_pmon_box_ctl: &[0xd04, 0xd24, 0xd44, 0xd64, 0xd84, 0xda4, 0xdc4, 0xde4,
|
||||
0xe04, 0xe24, 0xe44, 0xe64, 0xe84, 0xea4, 0xec4],
|
||||
val_box_freeze: 0x30100,
|
||||
val_box_reset: 0x30103,
|
||||
val_enable_counting: 0x400000,
|
||||
val_select_event: 0x401134,
|
||||
val_filter: 0x7e0010,
|
||||
val_box_unfreeze: 0x30000,
|
||||
};
|
||||
|
||||
const HASWELL_XEON: XeonPerfCounters = XeonPerfCounters {
|
||||
max_slice: 18,
|
||||
msr_pmon_ctr0: &[0xe08, 0xe18, 0xe28, 0xe38, 0xe48, 0xe58, 0xe68, 0xe78, 0xe88,
|
||||
0xe98, 0xea8, 0xeb8, 0xec8, 0xed8, 0xee8, 0xef8, 0xf08, 0xf18],
|
||||
msr_pmon_box_filter: &[0xe05, 0xe15, 0xe25, 0xe35, 0xe45, 0xe55, 0xe65, 0xe75, 0xe85,
|
||||
0xe95, 0xea5, 0xeb5, 0xec5, 0xed5, 0xee5, 0xef5, 0xf05, 0xf15],
|
||||
msr_pmon_ctl0: &[0xe01, 0xe11, 0xe21, 0xe31, 0xe41, 0xe51, 0xe61, 0xe71, 0xe81,
|
||||
0xe91, 0xea1, 0xeb1, 0xec1, 0xed1, 0xee1, 0xef1, 0xf01, 0xf11],
|
||||
msr_pmon_box_ctl: &[0xe00, 0xe10, 0xe20, 0xe30, 0xe40, 0xe50, 0xe60, 0xe70, 0xe80,
|
||||
0xe90, 0xea0, 0xeb0, 0xec0, 0xed0, 0xee0, 0xef0, 0xf00, 0xf10],
|
||||
val_box_freeze: 0x30100,
|
||||
val_box_reset: 0x30103,
|
||||
val_enable_counting: 0x400000,
|
||||
val_select_event: 0x401134,
|
||||
val_filter: 0x7e0020,
|
||||
val_box_unfreeze: 0x30000,
|
||||
};
|
||||
|
||||
const BROADWELL_XEON: XeonPerfCounters = XeonPerfCounters {
|
||||
max_slice: 24,
|
||||
msr_pmon_ctr0: &[0xe08, 0xe18, 0xe28, 0xe38, 0xe48, 0xe58, 0xe68, 0xe78,
|
||||
0xe88, 0xe98, 0xea8, 0xeb8, 0xec8, 0xed8, 0xee8, 0xef8,
|
||||
0xf08, 0xf18, 0xf28, 0xf38, 0xf48, 0xf58, 0xf68, 0xf78],
|
||||
msr_pmon_box_filter: &[0xe05, 0xe15, 0xe25, 0xe35, 0xe45, 0xe55, 0xe65, 0xe75,
|
||||
0xe85, 0xe95, 0xea5, 0xeb5, 0xec5, 0xed5, 0xee5, 0xef5,
|
||||
0xf05, 0xf15, 0xf25, 0xf35, 0xf45, 0xf55, 0xf65, 0xf75],
|
||||
msr_pmon_ctl0: &[0xe01, 0xe11, 0xe21, 0xe31, 0xe41, 0xe51, 0xe61, 0xe71,
|
||||
0xe81, 0xe91, 0xea1, 0xeb1, 0xec1, 0xed1, 0xee1, 0xef1,
|
||||
0xf01, 0xf11, 0xf21, 0xf31, 0xf41, 0xf51, 0xf61, 0xf71],
|
||||
msr_pmon_box_ctl: &[0xe00, 0xe10, 0xe20, 0xe30, 0xe40, 0xe50, 0xe60, 0xe70,
|
||||
0xe80, 0xe90, 0xea0, 0xeb0, 0xec0, 0xed0, 0xee0, 0xef0,
|
||||
0xf00, 0xf10, 0xf20, 0xf30, 0xf40, 0xf50, 0xf60, 0xf70],
|
||||
val_box_freeze: 0x30100,
|
||||
val_box_reset: 0x30103,
|
||||
val_enable_counting: 0x400000,
|
||||
val_select_event: 0x401134,
|
||||
val_filter: 0xfe0020,
|
||||
val_box_unfreeze: 0x30000,
|
||||
};
|
||||
|
||||
// TODO find appropriate values
|
||||
const ALDER_LAKE_TO_RAPTOR_LAKE_CORE: CorePerfCounters = CorePerfCounters {
|
||||
max_slice: 10,
|
||||
msr_unc_perf_global_ctr: 0x2ff0,
|
||||
val_enable_ctrs: 0, // TODO
|
||||
msr_unc_cbo_perfevtsel0: &[0x2000, 0x2008, 0x2010, 0x2018, 0x2020, 0x2028, 0x2030, 0x2038, 0x2040, 0x2048],
|
||||
msr_unc_cbo_per_ctr0: &[0x2002, 0x200a, 0x2012, 0x201a, 0x2022, 0x202a, 0x2032, 0x203a, 0x2042, 0x204a],
|
||||
val_disable_ctrs: 0, // TODO
|
||||
val_select_evt_core: 0, // TODO
|
||||
val_reset_ctrs: 0, // TODO
|
||||
};
|
||||
|
||||
// TODO verify his on ICELAKE, and appropriate values. Also deal with backport Cypress Cove ?
|
||||
const CANNON_LAKE_TO_TIGER_LAKE_CORE: CorePerfCounters = CorePerfCounters {
|
||||
max_slice: 8, // TODO
|
||||
msr_unc_perf_global_ctr: 0xe01,
|
||||
val_enable_ctrs: 0, // TODO
|
||||
msr_unc_cbo_perfevtsel0: &[0x700, 0x708, 0x710, 0x718, 0x720, 0x728, 0x730, 0x738],
|
||||
msr_unc_cbo_per_ctr0: &[0x702, 0x70a, 0x712, 0x71a, 0x722, 0x72a, 0x732, 0x73a],
|
||||
val_disable_ctrs: 0x0, // TODO
|
||||
val_select_evt_core: 0, // TODO
|
||||
val_reset_ctrs: 0x0, // TODO
|
||||
};
|
||||
|
||||
const SKYLAKE_KABYLAKE_CORE: CorePerfCounters = CorePerfCounters {
|
||||
max_slice: 7,
|
||||
msr_unc_perf_global_ctr: 0xe01,
|
||||
val_enable_ctrs: 0x20000000,
|
||||
msr_unc_cbo_perfevtsel0: &[0x700, 0x710, 0x720, 0x730, 0x740, 0x750, 0x760],
|
||||
msr_unc_cbo_per_ctr0: &[0x706, 0x716, 0x726, 0x736, 0x746, 0x756, 0x766],
|
||||
val_disable_ctrs: 0x0,
|
||||
val_select_evt_core: 0x408f34,
|
||||
val_reset_ctrs: 0x0,
|
||||
};
|
||||
|
||||
const SANDYBRIDGE_TO_BROADWELL_CORE: CorePerfCounters = CorePerfCounters {
|
||||
max_slice: 0,
|
||||
msr_unc_perf_global_ctr: 0x391,
|
||||
val_enable_ctrs: 0x2000000f,
|
||||
msr_unc_cbo_perfevtsel0: &[0x700, 0x710, 0x720, 0x730],
|
||||
msr_unc_cbo_per_ctr0: &[0x706, 0x716, 0x726, 0x736],
|
||||
val_disable_ctrs: 0x0,
|
||||
val_select_evt_core: 0x408f34,
|
||||
val_reset_ctrs: 0x0,
|
||||
};
|
37
cache_slice/src/bin/scan.rs
Normal file
37
cache_slice/src/bin/scan.rs
Normal file
@ -0,0 +1,37 @@
|
||||
use cache_slice::monitor_address;
|
||||
use cache_slice::utils::core_per_package;
|
||||
use nix::sched::{sched_getaffinity, CpuSet};
|
||||
|
||||
|
||||
pub fn main() {
|
||||
let nb_cores = core_per_package();
|
||||
println!("Found {} cores", nb_cores);
|
||||
|
||||
let target = vec![0x0123456789abcdefu64, 64];
|
||||
for core in 0..CpuSet::count() {
|
||||
for addr in target.iter() {
|
||||
let res = unsafe { monitor_address(addr as *const u64 as *const u8, core as u8, nb_cores) };
|
||||
let slice = res.iter().enumerate().max_by_key(|(i, val)| { val });
|
||||
match slice {
|
||||
Some((slice, _)) => {
|
||||
println!("({:2}) Slice for addr {:x}: {}", core, addr as *const u64 as usize, slice)
|
||||
}
|
||||
None => {
|
||||
eprintln!("({:2}) Failed to find slice for addr {:x}", core, addr as *const u64 as usize)
|
||||
}
|
||||
}
|
||||
}
|
||||
for addr in target.iter() {
|
||||
let res = unsafe { monitor_address(addr as *const u64 as *const u8, 0, nb_cores) };
|
||||
let slice = res.iter().enumerate().max_by_key(|(i, val)| { val });
|
||||
match slice {
|
||||
Some((slice, _)) => {
|
||||
println!("({:2}) Slice for addr {:x}: {}", 0, addr as *const u64 as usize, slice)
|
||||
}
|
||||
None => {
|
||||
eprintln!("({:2}) Failed to find slice for addr {:x}", 0, addr as *const u64 as usize)
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
@ -1,7 +1,109 @@
|
||||
pub mod msr;
|
||||
#![deny(unsafe_op_in_unsafe_fn)]
|
||||
|
||||
pub fn add(left: u64, right: u64) -> u64 {
|
||||
left + right
|
||||
use std::arch::x86_64::_mm_clflush;
|
||||
use crate::arch::CpuClass::{IntelCore, IntelXeon, IntelXeonSP};
|
||||
use crate::arch::get_performance_counters_xeon;
|
||||
use crate::Error::UnsupportedCPU;
|
||||
use crate::msr::{read_msr_on_cpu, write_msr_on_cpu};
|
||||
|
||||
pub mod msr;
|
||||
pub mod utils;
|
||||
mod arch;
|
||||
|
||||
pub enum Error {
|
||||
UnsupportedCPU,
|
||||
InvalidParameter,
|
||||
IO(std::io::Error),
|
||||
}
|
||||
|
||||
impl From<std::io::Error> for Error {
|
||||
fn from(value: std::io::Error) -> Self {
|
||||
Error::IO(value)
|
||||
}
|
||||
}
|
||||
|
||||
const NUM_POKE: usize = 10000;
|
||||
|
||||
unsafe fn poke(addr: *const u8) {
|
||||
for _i in 0..NUM_POKE {
|
||||
unsafe { _mm_clflush(addr) };
|
||||
}
|
||||
}
|
||||
|
||||
unsafe fn monitor_xeon(addr: *const u8, cpu: u8, max_cbox: usize) -> Result<Vec<u32>, Error> {
|
||||
let performance_counters = if let Some(p) = get_performance_counters_xeon() {
|
||||
p
|
||||
} else {
|
||||
return Err(UnsupportedCPU);
|
||||
};
|
||||
|
||||
if (performance_counters.max_slice as usize) < max_cbox {
|
||||
return Err(Error::InvalidParameter);
|
||||
}
|
||||
|
||||
// Freeze counters
|
||||
for i in 0..max_cbox {
|
||||
write_msr_on_cpu(performance_counters.msr_pmon_ctr0[i], cpu, performance_counters.val_box_freeze)?;
|
||||
}
|
||||
|
||||
// Reset counters
|
||||
for i in 0..max_cbox {
|
||||
write_msr_on_cpu(performance_counters.msr_pmon_ctl0[i], cpu, performance_counters.val_box_reset)?;
|
||||
}
|
||||
|
||||
// Enable counting
|
||||
for i in 0..max_cbox {
|
||||
write_msr_on_cpu(performance_counters.msr_pmon_ctl0[i], cpu, performance_counters.val_enable_counting)?;
|
||||
}
|
||||
|
||||
// Select event
|
||||
for i in 0..max_cbox {
|
||||
write_msr_on_cpu(performance_counters.msr_pmon_ctl0[i], cpu, performance_counters.val_select_event)?;
|
||||
write_msr_on_cpu(performance_counters.msr_pmon_box_filter[i], cpu, performance_counters.val_filter)?;
|
||||
}
|
||||
|
||||
// Unfreeze
|
||||
for i in 0..max_cbox {
|
||||
write_msr_on_cpu(performance_counters.msr_pmon_box_ctl[i], cpu, performance_counters.val_box_unfreeze)?;
|
||||
}
|
||||
|
||||
unsafe { poke(addr) };
|
||||
|
||||
// Freeze counters
|
||||
for i in 0..max_cbox {
|
||||
write_msr_on_cpu(performance_counters.msr_pmon_ctr0[i], cpu, performance_counters.val_box_freeze)?;
|
||||
}
|
||||
|
||||
// Read counters
|
||||
let mut result = Vec::new();
|
||||
for i in 0..max_cbox {
|
||||
let result = read_msr_on_cpu(performance_counters.msr_pmon_ctr0[i], cpu)?;
|
||||
result.push(result)
|
||||
}
|
||||
|
||||
Ok(result)
|
||||
}
|
||||
|
||||
fn monitor_core(addr: *const u8, cpu: u8, max_core: u8) -> Result<Vec<u32>, Error> {
|
||||
// Note, we need to add the workaround for one missing perf counter here.
|
||||
unimplemented!()
|
||||
}
|
||||
|
||||
pub unsafe fn monitor_address(addr: *const u8, cpu: u8, max_cbox: u16) -> Result<Vec<u32>, Error> {
|
||||
match arch::determine_cpu_class() {
|
||||
Some(IntelCore) => {
|
||||
unimplemented!()
|
||||
}
|
||||
Some(IntelXeon) => {
|
||||
unsafe { monitor_xeon(addr, cpu, max_cbox as usize) }
|
||||
}
|
||||
Some(IntelXeonSP) => { // TODO
|
||||
Err(UnsupportedCPU)
|
||||
}
|
||||
None => {
|
||||
Err(UnsupportedCPU)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
@ -10,7 +112,7 @@ mod tests {
|
||||
|
||||
#[test]
|
||||
fn it_works() {
|
||||
let result = add(2, 2);
|
||||
assert_eq!(result, 4);
|
||||
let result = 2;
|
||||
assert_eq!(result, 2);
|
||||
}
|
||||
}
|
||||
|
@ -2,35 +2,35 @@ use core::mem::size_of;
|
||||
use std::format;
|
||||
use std::fs::{File, OpenOptions};
|
||||
use std::os::unix::fs::FileExt;
|
||||
use std::io::{Result, Error, ErrorKind};
|
||||
use std::io::{Result, Error};
|
||||
|
||||
pub fn write_msr_on_cpu(msr: u32, cpu: u8, value: u64) -> Result<()> {
|
||||
pub fn write_msr_on_cpu(msr: u64, cpu: u8, value: u64) -> Result<()> {
|
||||
let path = format!("/dev/cpu/{}/msr", cpu);
|
||||
let file: File = OpenOptions::new().write(true).open(path).expect("Failed to open MSR, are you running as root ?");
|
||||
match file.write_at(&value.to_ne_bytes(), msr as u64) {
|
||||
match file.write_at(&value.to_ne_bytes(), msr) {
|
||||
Ok(size) => {
|
||||
if size == size_of::<u64>() {
|
||||
Ok(())
|
||||
} else {
|
||||
Err(Error::other("Failed to write complete value"))
|
||||
}
|
||||
},
|
||||
}
|
||||
Err(e) => Err(e)
|
||||
}
|
||||
}
|
||||
|
||||
pub fn read_msr_on_cpu(msr: u32, cpu: u8) -> Result<u64> {
|
||||
pub fn read_msr_on_cpu(msr: u64, cpu: u8) -> Result<u64> {
|
||||
let path = format!("/dev/cpu/{}/msr", cpu);
|
||||
let file: File = OpenOptions::new().read(true).open(path).expect("Failed to open MSR, are you running as root ?");
|
||||
let mut read_data = [0u8; size_of::<u64>()];
|
||||
match file.read_at(&mut read_data, msr as u64) {
|
||||
match file.read_at(&mut read_data, msr) {
|
||||
Ok(size) => {
|
||||
if size == size_of::<u64>() {
|
||||
Ok(u64::from_ne_bytes(read_data))
|
||||
} else {
|
||||
Err(Error::other("Failed to write complete value"))
|
||||
}
|
||||
},
|
||||
}
|
||||
Err(e) => Err(e)
|
||||
}
|
||||
}
|
||||
|
55
cache_slice/src/utils.rs
Normal file
55
cache_slice/src/utils.rs
Normal file
@ -0,0 +1,55 @@
|
||||
use raw_cpuid::{CpuId, CpuIdReaderNative, ExtendedTopologyIter, TopologyType};
|
||||
|
||||
fn get_topology_iterator() -> ExtendedTopologyIter<CpuIdReaderNative> {
|
||||
let cpuid = CpuId::new();
|
||||
let topology_iter = if let Some(t) = cpuid.get_extended_topology_info_v2() {
|
||||
t
|
||||
} else if let Some(t) = cpuid.get_extended_topology_info() {
|
||||
t
|
||||
} else {
|
||||
panic!("Unsupported CPU");
|
||||
};
|
||||
topology_iter
|
||||
}
|
||||
|
||||
pub fn threads_per_package() -> Option<u16> {
|
||||
let topology_iter = get_topology_iterator();
|
||||
let mut t_per_package = None;
|
||||
for level in topology_iter {
|
||||
if let Some(t_per_package) = t_per_package {
|
||||
assert!(t_per_package <= level.processors())
|
||||
}
|
||||
t_per_package = Some(level.processors())
|
||||
}
|
||||
t_per_package
|
||||
}
|
||||
|
||||
pub fn core_per_package() -> u16 {
|
||||
let topology_iter = get_topology_iterator();
|
||||
let mut t_per_core = None;
|
||||
let mut t_per_package = None;
|
||||
for level in topology_iter {
|
||||
//println!("{:?}", level);
|
||||
match level.level_type() {
|
||||
TopologyType::SMT => {
|
||||
assert_eq!(t_per_core, None);
|
||||
t_per_core = Some(level.processors());
|
||||
}
|
||||
_ => { // TODO identify the right level ?
|
||||
if let Some(t_per_package) = t_per_package {
|
||||
assert!(t_per_package <= level.processors())
|
||||
}
|
||||
// Or change the API to enable the user to specify the topology level to use according to the CPU micro-arch.
|
||||
t_per_package = Some(level.processors())
|
||||
}
|
||||
}
|
||||
}
|
||||
if let Some(t_per_core) = t_per_core {
|
||||
if let Some(t_per_package) = t_per_package {
|
||||
if t_per_package % t_per_core == 0 {
|
||||
return t_per_package / t_per_core;
|
||||
}
|
||||
}
|
||||
}
|
||||
0
|
||||
}
|
Loading…
Reference in New Issue
Block a user