From 84eee25e5a60f87a02b019df06b287f83ab8cf44 Mon Sep 17 00:00:00 2001 From: Guillume DIDIER Date: Wed, 24 Nov 2021 11:52:23 +0100 Subject: [PATCH] Add prefetcher experiments --- prefetcher_reverse/src/bin/new_page.rs | 13 ++- prefetcher_reverse/src/bin/rpa_patterns.rs | 13 ++- prefetcher_reverse/src/bin/stream.rs | 93 ++++++++++++++++++++-- prefetcher_reverse/src/bin/strides.rs | 13 ++- prefetcher_reverse/src/ip_tool.rs | 15 ++-- prefetcher_reverse/src/lib.rs | 63 +++++++++------ prefetcher_reverse/src/main.rs | 23 +++--- 7 files changed, 177 insertions(+), 56 deletions(-) diff --git a/prefetcher_reverse/src/bin/new_page.rs b/prefetcher_reverse/src/bin/new_page.rs index 2fc63a5..823e6e0 100644 --- a/prefetcher_reverse/src/bin/new_page.rs +++ b/prefetcher_reverse/src/bin/new_page.rs @@ -1,18 +1,23 @@ -use prefetcher_reverse::{Prober, PAGE_CACHELINE_LEN}; +use prefetcher_reverse::ip_tool::{Function, TIMED_MACCESS}; +use prefetcher_reverse::{pattern_helper, Prober, PAGE_CACHELINE_LEN}; pub const NUM_ITERATION: usize = 1 << 10; -fn exp(delay: u64) { +fn exp(delay: u64, reload: &Function) { let mut prober = Prober::<2>::new(63).unwrap(); prober.set_delay(delay); let pattern = (0usize..(PAGE_CACHELINE_LEN * 2usize)).collect::>(); - let result = prober.full_page_probe(pattern, NUM_ITERATION as u32, 100); + let p = pattern_helper(pattern, reload); + + let result = prober.full_page_probe(p, NUM_ITERATION as u32, 100); println!("{}", result); } fn main() { + let reload = Function::try_new(1, 0, TIMED_MACCESS).unwrap(); + for delay in [0, 5, 10, 50] { println!("Delay after each access: {} us", delay); - exp(delay); + exp(delay, &reload); } } diff --git a/prefetcher_reverse/src/bin/rpa_patterns.rs b/prefetcher_reverse/src/bin/rpa_patterns.rs index b819578..d5fdeed 100644 --- a/prefetcher_reverse/src/bin/rpa_patterns.rs +++ b/prefetcher_reverse/src/bin/rpa_patterns.rs @@ -10,7 +10,10 @@ use cache_utils::mmap; use cache_utils::mmap::MMappedMemory; use flush_flush::{FFHandle, FFPrimitives, FlushAndFlush}; use nix::Error; -use prefetcher_reverse::{reference_patterns, Prober, CACHE_LINE_LEN, PAGE_CACHELINE_LEN}; +use prefetcher_reverse::ip_tool::{Function, TIMED_MACCESS}; +use prefetcher_reverse::{ + pattern_helper, reference_patterns, Prober, CACHE_LINE_LEN, PAGE_CACHELINE_LEN, +}; use rand::seq::SliceRandom; use std::iter::Cycle; use std::ops::Range; @@ -18,19 +21,21 @@ use std::ops::Range; pub const NUM_ITERATION: usize = 1 << 10; pub const NUM_PAGES: usize = 256; -fn exp(delay: u64) { +fn exp(delay: u64, reload: &Function) { for (name, pattern) in reference_patterns() { + let p = pattern_helper(pattern, reload); let mut prober = Prober::<1>::new(63).unwrap(); println!("{}", name); - let result = prober.full_page_probe(pattern, NUM_ITERATION as u32, 100); + let result = prober.full_page_probe(p, NUM_ITERATION as u32, 100); println!("{}", result); } } fn main() { + let reload = Function::try_new(1, 0, TIMED_MACCESS).unwrap(); for delay in [0, 5, 10, 50] { println!("Delay after each access: {} us", delay); - exp(delay); + exp(delay, &reload); } } diff --git a/prefetcher_reverse/src/bin/stream.rs b/prefetcher_reverse/src/bin/stream.rs index 75d944b..d6d92b0 100644 --- a/prefetcher_reverse/src/bin/stream.rs +++ b/prefetcher_reverse/src/bin/stream.rs @@ -1,8 +1,41 @@ -use prefetcher_reverse::{Prober, PAGE_CACHELINE_LEN}; +use cache_utils::{flush, maccess}; +use prefetcher_reverse::ip_tool::{Function, TIMED_MACCESS}; +use prefetcher_reverse::{pattern_helper, Prober, PAGE_CACHELINE_LEN}; +use std::arch::x86_64 as arch_x86; pub const NUM_ITERATION: usize = 1 << 10; -fn exp(stride: usize, num_steps: i32, delay: u64) { +unsafe extern "C" fn prefetch_l2(p: *const u8) -> u64 { + maccess(p); + arch_x86::_mm_mfence(); + flush(p); + arch_x86::_mm_mfence(); + arch_x86::_mm_prefetch::<{ arch_x86::_MM_HINT_T1 }>(p as *const i8); + arch_x86::__cpuid_count(0, 0); + 0 +} + +unsafe extern "C" fn prefetch_l3(p: *const u8) -> u64 { + maccess(p); + arch_x86::_mm_mfence(); + flush(p); + arch_x86::_mm_mfence(); + arch_x86::_mm_prefetch::<{ arch_x86::_MM_HINT_T2 }>(p as *const i8); + arch_x86::__cpuid_count(0, 0); + 0 +} + +unsafe extern "C" fn prefetch_l1(p: *const u8) -> u64 { + maccess(p); + arch_x86::_mm_mfence(); + flush(p); + arch_x86::_mm_mfence(); + arch_x86::_mm_prefetch::<{ arch_x86::_MM_HINT_T0 }>(p as *const i8); + arch_x86::__cpuid_count(0, 0); + 0 +} + +fn exp(stride: usize, num_steps: i32, delay: u64, reload: &Function) { let mut prober = Prober::<2>::new(63).unwrap(); prober.set_delay(delay); let limit = if num_steps < 0 { @@ -11,23 +44,71 @@ fn exp(stride: usize, num_steps: i32, delay: u64) { stride * num_steps as usize }; let pattern = (2usize..limit).step_by(stride).collect::>(); - let result = prober.full_page_probe(pattern, NUM_ITERATION as u32, 100); + let p = pattern_helper(pattern, reload); + + let pl2 = Function { + fun: prefetch_l2, + ip: prefetch_l2 as *const u8, + end: prefetch_l2 as *const u8, + size: 0, + }; + + let pl3 = Function { + fun: prefetch_l3, + ip: prefetch_l3 as *const u8, + end: prefetch_l3 as *const u8, + size: 0, + }; + + let pl1 = Function { + fun: prefetch_l1, + ip: prefetch_l1 as *const u8, + end: prefetch_l1 as *const u8, + size: 0, + }; + + let mut pattern_pl2 = pattern_helper((0..(2 * PAGE_CACHELINE_LEN)).collect(), &pl2); + pattern_pl2.extend(p.iter().cloned()); + + let mut pattern_pl3 = pattern_helper((0..(2 * PAGE_CACHELINE_LEN)).collect(), &pl3); + pattern_pl3.extend(p.iter().cloned()); + + let mut pattern_pl1 = pattern_helper((0..(2 * PAGE_CACHELINE_LEN)).collect(), &pl1); + pattern_pl1.extend(p.iter().cloned()); + + println!("With no sw prefetch"); + let result = prober.full_page_probe(p, NUM_ITERATION as u32, 100); + println!("{}", result); + println!("With L2 sw prefetch"); + let result = prober.full_page_probe(pattern_pl2, NUM_ITERATION as u32, 100); + println!("{}", result); + println!("With L3 sw prefetch"); + let result = prober.full_page_probe(pattern_pl3, NUM_ITERATION as u32, 100); + println!("{}", result); + println!("With L1 sw prefetch"); + let result = prober.full_page_probe(pattern_pl1, NUM_ITERATION as u32, 100); println!("{}", result); } fn main() { - for stride in [3, 4] { + //let reload = Function::try_new(1, 0, TIMED_MACCESS).unwrap(); + let mut reloads = Vec::new(); + for i in 0..3 { + reloads.push(Function::try_new(4, i, TIMED_MACCESS).unwrap()); + } + for (index, stride) in [2, 3, 4].iter().enumerate() { + let reload = &reloads[index]; for delay_shift in [5, 12] { let limit = ((PAGE_CACHELINE_LEN + 32) / stride) as i32; //for num_steps in -1..limit { let num_steps = limit; println!( "Stride: {}, Limit: {}, Delay: {}", - stride, + *stride, num_steps, 1 << delay_shift ); - exp(stride, num_steps, 1 << delay_shift); + exp(*stride, num_steps, 1 << delay_shift, &reload); //} } } diff --git a/prefetcher_reverse/src/bin/strides.rs b/prefetcher_reverse/src/bin/strides.rs index f2c6a49..5dab7aa 100644 --- a/prefetcher_reverse/src/bin/strides.rs +++ b/prefetcher_reverse/src/bin/strides.rs @@ -1,8 +1,9 @@ -use prefetcher_reverse::{Prober, PAGE_CACHELINE_LEN}; +use prefetcher_reverse::ip_tool::{Function, TIMED_MACCESS}; +use prefetcher_reverse::{pattern_helper, Prober, PAGE_CACHELINE_LEN}; pub const NUM_ITERATION: usize = 1 << 10; -fn exp(stride: usize, num_steps: i32, delay: u64) { +fn exp(stride: usize, num_steps: i32, delay: u64, reload: &Function) { let mut prober = Prober::<2>::new(63).unwrap(); prober.set_delay(delay); let limit = if num_steps < 0 { @@ -11,11 +12,15 @@ fn exp(stride: usize, num_steps: i32, delay: u64) { stride * num_steps as usize }; let pattern = (2usize..limit).step_by(stride).collect::>(); - let result = prober.full_page_probe(pattern, NUM_ITERATION as u32, 100); + let p = pattern_helper(pattern, reload); + + let result = prober.full_page_probe(p, NUM_ITERATION as u32, 100); println!("{}", result); } fn main() { + let reload = Function::try_new(1, 0, TIMED_MACCESS).unwrap(); + for stride in [5, 7, 8] { for delay_shift in [5, 12, 20] { //let stride = 8; @@ -28,7 +33,7 @@ fn main() { num_steps, 1 << delay_shift ); - exp(stride, num_steps, 1 << delay_shift); + exp(stride, num_steps, 1 << delay_shift, &reload); //} } } diff --git a/prefetcher_reverse/src/ip_tool.rs b/prefetcher_reverse/src/ip_tool.rs index 04baffc..e0cc893 100644 --- a/prefetcher_reverse/src/ip_tool.rs +++ b/prefetcher_reverse/src/ip_tool.rs @@ -32,22 +32,25 @@ pub struct FunctionTemplate { end: *const u8, } +// Note those fields should not be public +// We need a way to also take care of non allocated functions. +#[derive(Debug)] pub struct Function { - fun: unsafe extern "C" fn(*const u8) -> u64, - ip: *const u8, - end: *const u8, - size: usize, + pub fun: unsafe extern "C" fn(*const u8) -> u64, + pub ip: *const u8, + pub end: *const u8, + pub size: usize, } lazy_static! { static ref wx_allocator: Mutex = Mutex::new(WXAllocator::new()); } -const TIMED_MACCESS: FunctionTemplate = FunctionTemplate { +pub const TIMED_MACCESS: FunctionTemplate = FunctionTemplate { start: timed_maccess_template, ip: timed_maccess_template_ip as *const u8, end: timed_maccess_template_end as *const u8, }; -const TIMED_CLFLUSH: FunctionTemplate = FunctionTemplate { +pub const TIMED_CLFLUSH: FunctionTemplate = FunctionTemplate { start: timed_clflush_template, ip: timed_clflush_template_ip as *const u8, end: timed_clflush_template_end as *const u8, diff --git a/prefetcher_reverse/src/lib.rs b/prefetcher_reverse/src/lib.rs index 1227345..215f85a 100644 --- a/prefetcher_reverse/src/lib.rs +++ b/prefetcher_reverse/src/lib.rs @@ -26,6 +26,8 @@ use std::{thread, time}; pub mod ip_tool; +use ip_tool::Function; + // NB these may need to be changed / dynamically measured. pub const CACHE_LINE_LEN: usize = 64; pub const PAGE_CACHELINE_LEN: usize = PAGE_LEN / CACHE_LINE_LEN; @@ -58,9 +60,15 @@ pub enum ProbeType { FullFlush, } +#[derive(Debug, Clone)] +pub struct PatternAccess<'a> { + pub function: &'a Function, + pub offset: usize, +} + #[derive(Debug)] -pub struct ProbePattern { - pub pattern: Vec, +pub struct ProbePattern<'a> { + pub pattern: Vec>, pub probe: Probe, } @@ -105,8 +113,8 @@ pub struct DualProbeResult { } #[derive(Debug)] -pub struct FullPageDualProbeResults { - pub pattern: Vec, +pub struct FullPageDualProbeResults<'a> { + pub pattern: Vec>, pub num_iteration: u32, pub single_probe_results: Vec, pub full_flush_results: DPRItem, @@ -120,8 +128,8 @@ pub struct SingleProbeResult { } #[derive(Debug)] -pub struct FullPageSingleProbeResult { - pub pattern: Vec, +pub struct FullPageSingleProbeResult<'a, const GS: usize> { + pub pattern: Vec>, pub probe_type: ProbeType, pub num_iteration: u32, pub results: Vec, @@ -243,15 +251,14 @@ impl Prober { unsafe { self.ff_channel.prepare(&mut ff_handles) }; let mut pattern_res = vec![0; pattern.pattern.len()]; - for (i, offset) in pattern.pattern.iter().enumerate() { - let h = &mut self.fr_handles[page_index][*offset]; - pattern_res[i] = unsafe { self.fr_channel.test_debug(h, false) }.unwrap().1; + for (i, access) in pattern.pattern.iter().enumerate() { + let h = &mut self.fr_handles[page_index][access.offset]; + let pointer: *const u8 = h.to_const_u8_pointer(); + pattern_res[i] = unsafe { (access.function.fun)(pointer) }; + // TODO IP : This is where the pattern access need to be done using pattern.function instead. + //pattern_res[i] = unsafe { self.fr_channel.test_debug(h, false) }.unwrap().1; delay(self.delay); - /*if self.delay > 0 { - thread::sleep(time::Duration::from_nanos(self.delay)); // FIXME parameter magic - }*/ - //pattern_res[i] = unsafe { self.fr_channel.test_single(h, false) }.unwrap() - //pattern_res[i] = Miss; + //pattern_res[i] = unsafe { self.fr_channel.test_single(h, false) }.unwrap(); //unsafe { only_reload(h.to_const_u8_pointer()) }; } @@ -329,13 +336,13 @@ impl Prober { result } - fn full_page_probe_helper( + fn full_page_probe_helper<'a>( &mut self, - pattern: &mut ProbePattern, + pattern: &mut ProbePattern<'a>, probe_type: ProbeType, num_iteration: u32, warmup: u32, - ) -> FullPageSingleProbeResult { + ) -> FullPageSingleProbeResult<'a, GS> { let mut result = FullPageSingleProbeResult { pattern: pattern.pattern.clone(), probe_type, @@ -362,12 +369,12 @@ impl Prober { result } - pub fn full_page_probe( + pub fn full_page_probe<'a>( &mut self, - pattern: Vec, + pattern: Vec>, num_iteration: u32, warmup: u32, - ) -> FullPageDualProbeResults { + ) -> FullPageDualProbeResults<'a> { let mut probe_pattern = ProbePattern { pattern: pattern, probe: Probe::FullFlush, @@ -417,13 +424,13 @@ impl Prober { } } -impl Display for FullPageDualProbeResults { +impl<'a> Display for FullPageDualProbeResults<'a> { fn fmt(&self, f: &mut Formatter<'_>) -> std::fmt::Result { let mut indices = vec![None; self.single_probe_results.len()]; let pat_len = self.pattern.len(); let divider = (self.single_probe_results.len() * self.num_iteration as usize) as f32; - for (i, &offset) in self.pattern.iter().enumerate() { - indices[offset] = Some(i); + for (i, access) in self.pattern.iter().enumerate() { + indices[access.offset] = Some(i); } // Display header let mut r = writeln!( @@ -529,3 +536,13 @@ pub fn reference_patterns() -> [(&'static str, Vec); 9] { ("Pattern 5 (IV)", vec![0, 1, 2, 63, 62, 61, 19]), ] } + +pub fn pattern_helper<'a>(offsets: Vec, function: &'a Function) -> Vec> { + offsets + .into_iter() + .map(|i| PatternAccess { + function, + offset: i, + }) + .collect() +} diff --git a/prefetcher_reverse/src/main.rs b/prefetcher_reverse/src/main.rs index b6f4d42..174e11d 100644 --- a/prefetcher_reverse/src/main.rs +++ b/prefetcher_reverse/src/main.rs @@ -10,7 +10,10 @@ use cache_utils::mmap; use cache_utils::mmap::MMappedMemory; use flush_flush::{FFHandle, FFPrimitives, FlushAndFlush}; use nix::Error; -use prefetcher_reverse::{Prober, CACHE_LINE_LEN, PAGE_CACHELINE_LEN}; +use prefetcher_reverse::ip_tool::{Function, TIMED_MACCESS}; +use prefetcher_reverse::{ + pattern_helper, PatternAccess, Prober, CACHE_LINE_LEN, PAGE_CACHELINE_LEN, +}; use rand::seq::SliceRandom; use std::iter::Cycle; @@ -196,8 +199,10 @@ fn main() { } }*/ - let pattern = generate_pattern(0, 3, 12).unwrap(); - let pattern4 = generate_pattern(0, 4, 12).unwrap(); + let reload = Function::try_new(1, 0, TIMED_MACCESS).unwrap(); + + let pattern = pattern_helper(generate_pattern(0, 3, 12).unwrap(), &reload); + let pattern4 = pattern_helper(generate_pattern(0, 4, 12).unwrap(), &reload); let mut new_prober = Prober::<1>::new(63).unwrap(); let result = new_prober.full_page_probe(pattern.clone(), NUM_ITERATION as u32, 100); println!("{}", result); @@ -207,27 +212,27 @@ fn main() { println!("{}", result2); let result4 = new_prober.full_page_probe(pattern4, NUM_ITERATION as u32, 100); println!("{}", result4); - let pattern5 = generate_pattern(0, 5, 8).unwrap(); + let pattern5 = pattern_helper(generate_pattern(0, 5, 8).unwrap(), &reload); let result5 = new_prober.full_page_probe(pattern5, NUM_ITERATION as u32, 100); println!("{}", result5); - let pattern5 = generate_pattern(0, 5, 4).unwrap(); + let pattern5 = pattern_helper(generate_pattern(0, 5, 4).unwrap(), &reload); let result5 = new_prober.full_page_probe(pattern5, NUM_ITERATION as u32, 100); println!("{}", result5); - let pattern = generate_pattern(0, 10, 4).unwrap(); + let pattern = pattern_helper(generate_pattern(0, 10, 4).unwrap(), &reload); let result = new_prober.full_page_probe(pattern, NUM_ITERATION as u32, 100); println!("{}", result); - let pattern = generate_pattern(0, 6, 8).unwrap(); + let pattern = pattern_helper(generate_pattern(0, 6, 8).unwrap(), &reload); let result = new_prober.full_page_probe(pattern, NUM_ITERATION as u32, 100); println!("{}", result); - let pattern = generate_pattern(2, 6, 0).unwrap(); + let pattern = pattern_helper(generate_pattern(2, 6, 0).unwrap(), &reload); let result = new_prober.full_page_probe(pattern, NUM_ITERATION as u32, 100); println!("{}", result); - let pattern = vec![0, 0, 8, 8, 16, 16, 24, 24]; + let pattern = pattern_helper(vec![0, 0, 8, 8, 16, 16, 24, 24], &reload); let result = new_prober.full_page_probe(pattern, NUM_ITERATION as u32, 100); println!("{}", result);