From 26b2d2294263e6db6539662dd2a5cab9a8d77489 Mon Sep 17 00:00:00 2001
From: GuillaumeDIDIER <guillaume.didier95@hotmail.fr>
Date: Fri, 1 May 2020 10:24:15 +0200
Subject: [PATCH] Update result analysis scripts and code to the version for
 results-2020-04-20

---
 .idea/DendrobatesTinctoriusAzureus.iml        |  5 +-
 .../results-2020-04-07-5c025fb/analyse_csv.py | 35 ++++++++-
 cache_utils/results-2020-04-17/analyse.sh     |  6 ++
 cache_utils/results-2020-04-17/analyse_csv.py | 63 ++++++++++++++++
 .../results-2020-04-17/analyse_iterations.awk | 16 ++++
 cache_utils/results-2020-04-20/analyse.sh     |  6 ++
 cache_utils/results-2020-04-20/analyse_csv.py | 52 +++++++++++++
 .../results-2020-04-20/analyse_iterations.awk | 16 ++++
 cache_utils/src/calibration.rs                | 75 +++++++++++++------
 cache_utils/src/lib.rs                        |  1 +
 cache_utils/src/main.rs                       | 27 ++++++-
 requirements.txt                              |  3 +
 12 files changed, 279 insertions(+), 26 deletions(-)
 create mode 100755 cache_utils/results-2020-04-17/analyse.sh
 create mode 100644 cache_utils/results-2020-04-17/analyse_csv.py
 create mode 100644 cache_utils/results-2020-04-17/analyse_iterations.awk
 create mode 100755 cache_utils/results-2020-04-20/analyse.sh
 create mode 100644 cache_utils/results-2020-04-20/analyse_csv.py
 create mode 100644 cache_utils/results-2020-04-20/analyse_iterations.awk
 create mode 100644 requirements.txt
diff --git a/.idea/DendrobatesTinctoriusAzureus.iml b/.idea/DendrobatesTinctoriusAzureus.iml
index da99253..9f85fe3 100644
--- a/.idea/DendrobatesTinctoriusAzureus.iml
+++ b/.idea/DendrobatesTinctoriusAzureus.iml
@@ -2,7 +2,7 @@
 <module type="CPP_MODULE" version="4">
   <component name="FacetManager">
     <facet type="Python" name="Python facet">
-      <configuration sdkName="Python 3.7" />
+      <configuration sdkName="Python 3.7 (dendrobates-t-azureus)" />
     </facet>
   </component>
   <component name="NewModuleRootManager">
@@ -35,10 +35,11 @@
       <excludeFolder url="file://$MODULE_DIR$/kernel/target" />
       <excludeFolder url="file://$MODULE_DIR$/polling_serial/target" />
       <excludeFolder url="file://$MODULE_DIR$/target" />
+      <excludeFolder url="file://$MODULE_DIR$/venv" />
       <excludeFolder url="file://$MODULE_DIR$/vga_buffer/target" />
     </content>
     <orderEntry type="inheritedJdk" />
     <orderEntry type="sourceFolder" forTests="false" />
-    <orderEntry type="library" name="Python 3.7 interpreter library" level="application" />
+    <orderEntry type="library" name="Python 3.7 (dendrobates-t-azureus) interpreter library" level="application" />
   </component>
 </module>
\ No newline at end of file
diff --git a/cache_utils/results-2020-04-07-5c025fb/analyse_csv.py b/cache_utils/results-2020-04-07-5c025fb/analyse_csv.py
index ea6415f..f629126 100644
--- a/cache_utils/results-2020-04-07-5c025fb/analyse_csv.py
+++ b/cache_utils/results-2020-04-07-5c025fb/analyse_csv.py
@@ -1,4 +1,6 @@
-import pandas
+import pandas as pd
+import matplotlib.pyplot as plt
+import seaborn as sns
 
 columns = ["Addr", "Hash"]
 core_number = 8 # FIXME
@@ -7,8 +9,37 @@ for i in range(0, core_number):
         for op in ["Hit", "Miss"]:
             columns.append(op + str(i) + stat)
 columns.append("Hmm")
-df = pandas.read_csv("citron-vert/combined.csv", header=0, names=columns)
+df = pd.read_csv("citron-vert/combined.csv", header=0, names=columns)
 selected_columns = columns[:-1]
 df = df[selected_columns]
 print(df.head())
 
+median_columns = list(filter(lambda s: s.endswith("Med"), columns))
+
+median_hits_col = list(filter(lambda s: s.startswith("Hit"), median_columns))
+median_miss_col = list(filter(lambda s: s.startswith("Miss"), median_columns))
+
+print(list(median_columns))
+print(list(median_hits_col), list(median_miss_col))
+
+hashes = df["Hash"].drop_duplicates()
+print(hashes)
+
+#def distrib(x, y, **kwargs):
+#    sns.distplot()
+
+separate_core_df = df.melt(id_vars=["Addr", "Hash"], value_vars=median_hits_col)
+
+g = sns.FacetGrid(separate_core_df, row="variable")
+g.map(sns.distplot, "value")
+plt.figure()
+
+separate_core_df = df.melt(id_vars=["Addr", "Hash"], value_vars=median_miss_col)
+g = sns.FacetGrid(separate_core_df, row="variable")
+g.map(sns.distplot, "value", hist_kws={"range":(75,115)})
+
+plt.show()
+
+#sns.distplot(df["values"], hist_kws={"weights": df["count"]})
+
+
diff --git a/cache_utils/results-2020-04-17/analyse.sh b/cache_utils/results-2020-04-17/analyse.sh
new file mode 100755
index 0000000..f780115
--- /dev/null
+++ b/cache_utils/results-2020-04-17/analyse.sh
@@ -0,0 +1,6 @@
+#!/bin/sh
+awk '/^Iteration [:digit:]*[.]*/ ' < log.txt > iterations.txt
+rm results.csv
+awk -f `dirname $0`/analyse_iterations.awk < iterations.txt # This uses system to split off awk scripts doing the analysis
+grep -v -e "0,0$" results.csv > results_lite.csv
+#paste -d"," *.csv > combined.csv
diff --git a/cache_utils/results-2020-04-17/analyse_csv.py b/cache_utils/results-2020-04-17/analyse_csv.py
new file mode 100644
index 0000000..83d6d30
--- /dev/null
+++ b/cache_utils/results-2020-04-17/analyse_csv.py
@@ -0,0 +1,63 @@
+import pandas as pd
+import matplotlib.pyplot as plt
+import seaborn as sns
+from sys import exit
+
+columns = ["Addr", "Hash"]
+core_number = 8  # FIXME
+for i in range(0, core_number):
+    for stat in ["Min", "Med", "Max"]:
+        for op in ["Hit", "Miss"]:
+            columns.append(op + str(i) + stat)
+columns.append("Hmm")
+df = pd.read_csv("./results_lite.csv")
+print(df.head())
+
+
+g = sns.FacetGrid(df, col="core", row="hash", legend_out=True)
+
+
+def custom_hist(x,y, **kwargs):
+    sns.distplot(x, range(100,150), hist_kws={"weights": y, "histtype":"step"}, kde=False, **kwargs)
+
+g.map(custom_hist, "time", "clflush_hit")
+# g.map(sns.distplot, "time", hist_kws={"weights": df["clflush_hit"]}, kde=False)
+plt.show()
+
+# test = pd.DataFrame({"value" : [0, 5], "weight": [5, 1]})
+# plt.figure()
+# sns.distplot(test["value"], hist_kws={"weights": test["weight"]}, kde=False)
+
+exit(0)
+
+selected_columns = columns[:-1]
+df = df[selected_columns]
+print(df.head())
+
+median_columns = list(filter(lambda s: s.endswith("Med"), columns))
+
+median_hits_col = list(filter(lambda s: s.startswith("Hit"), median_columns))
+median_miss_col = list(filter(lambda s: s.startswith("Miss"), median_columns))
+
+print(list(median_columns))
+print(list(median_hits_col), list(median_miss_col))
+
+hashes = df["Hash"].drop_duplicates()
+print(hashes)
+
+# def distrib(x, y, **kwargs):
+#    sns.distplot()
+
+separate_core_df = df.melt(id_vars=["Addr", "Hash"], value_vars=median_hits_col)
+
+g = sns.FacetGrid(separate_core_df, row="variable")
+g.map(sns.distplot, "value")
+plt.figure()
+
+separate_core_df = df.melt(id_vars=["Addr", "Hash"], value_vars=median_miss_col)
+g = sns.FacetGrid(separate_core_df, row="variable")
+g.map(sns.distplot, "value", hist_kws={"range": (75, 115)})
+
+plt.show()
+
+# sns.distplot(df["values"], hist_kws={"weights": df["count"]})
diff --git a/cache_utils/results-2020-04-17/analyse_iterations.awk b/cache_utils/results-2020-04-17/analyse_iterations.awk
new file mode 100644
index 0000000..496ca5d
--- /dev/null
+++ b/cache_utils/results-2020-04-17/analyse_iterations.awk
@@ -0,0 +1,16 @@
+BEGIN {
+    i = 0
+}
+{
+    start = $0
+    getline
+    end = $0
+    if (i == 0) {
+        # generate header
+        system("awk '$0 == \""start"\",$0 == \""end"\"' < log.txt | grep \"RESULT:\" | head -n 1 | cut -b 8-  | awk '{print \"core,\" $0}'> results.csv")
+    }
+    cut = "cut -b 8- | tail -n +2"
+
+    system("awk '$0 == \""start"\",$0 == \""end"\"' < log.txt | grep \"RESULT:\" | " cut " | awk '{print  \""i",\" $0}'>> results.csv")
+    i = i + 1
+}
diff --git a/cache_utils/results-2020-04-20/analyse.sh b/cache_utils/results-2020-04-20/analyse.sh
new file mode 100755
index 0000000..f780115
--- /dev/null
+++ b/cache_utils/results-2020-04-20/analyse.sh
@@ -0,0 +1,6 @@
+#!/bin/sh
+awk '/^Iteration [:digit:]*[.]*/ ' < log.txt > iterations.txt
+rm results.csv
+awk -f `dirname $0`/analyse_iterations.awk < iterations.txt # This uses system to split off awk scripts doing the analysis
+grep -v -e "0,0$" results.csv > results_lite.csv
+#paste -d"," *.csv > combined.csv
diff --git a/cache_utils/results-2020-04-20/analyse_csv.py b/cache_utils/results-2020-04-20/analyse_csv.py
new file mode 100644
index 0000000..e9c4dfc
--- /dev/null
+++ b/cache_utils/results-2020-04-20/analyse_csv.py
@@ -0,0 +1,52 @@
+import pandas as pd
+import matplotlib.pyplot as plt
+import seaborn as sns
+from sys import exit
+import wquantiles as wq
+
+df = pd.read_csv("./results_lite.csv")
+print(df.head())
+
+
+g = sns.FacetGrid(df, col="core", row="hash", legend_out=True)
+
+
+def custom_hist(x, y1, y2, **kwargs):
+    sns.distplot(x,range(200, 280), hist_kws={"weights": y1, "histtype":"step"}, kde=False, **kwargs)
+    kwargs["color"] = "r"
+    sns.distplot(x, range(200, 280), hist_kws={"weights": y2, "histtype":"step"}, kde=False, **kwargs)
+
+g.map(custom_hist, "time", "clflush_hit", "clflush_miss")
+# g.map(sns.distplot, "time", hist_kws={"weights": df["clflush_hit"]}, kde=False)
+
+plt.figure()
+
+def stat(x, key):
+    return wq.median(x["time"], x[key])
+
+
+miss = df.groupby(["core", "hash"]).apply(stat, "clflush_miss")
+stats = miss.reset_index()
+stats.columns = ["Core", "Hash", "Miss"]
+hit = df.groupby(["core", "hash"]).apply(stat, "clflush_hit")
+stats["Hit"] = hit.values
+
+
+print(stats.to_string())
+
+g = sns.FacetGrid(stats, row="Core")
+
+g.map(sns.distplot, 'Miss', bins=range(200, 280), color="r")
+g.map(sns.distplot, 'Hit', bins=range(200, 280))
+plt.show()
+
+#stats["clflush_miss_med"] = stats[[0]].apply(lambda x: x["miss_med"])
+#stats["clflush_hit_med"] = stats[[0]].apply(lambda x: x["hit_med"])
+#del df[[0]]
+#print(hit.to_string(), miss.to_string())
+
+# test = pd.DataFrame({"value" : [0, 5], "weight": [5, 1]})
+# plt.figure()
+# sns.distplot(test["value"], hist_kws={"weights": test["weight"]}, kde=False)
+
+exit(0)
diff --git a/cache_utils/results-2020-04-20/analyse_iterations.awk b/cache_utils/results-2020-04-20/analyse_iterations.awk
new file mode 100644
index 0000000..496ca5d
--- /dev/null
+++ b/cache_utils/results-2020-04-20/analyse_iterations.awk
@@ -0,0 +1,16 @@
+BEGIN {
+    i = 0
+}
+{
+    start = $0
+    getline
+    end = $0
+    if (i == 0) {
+        # generate header
+        system("awk '$0 == \""start"\",$0 == \""end"\"' < log.txt | grep \"RESULT:\" | head -n 1 | cut -b 8-  | awk '{print \"core,\" $0}'> results.csv")
+    }
+    cut = "cut -b 8- | tail -n +2"
+
+    system("awk '$0 == \""start"\",$0 == \""end"\"' < log.txt | grep \"RESULT:\" | " cut " | awk '{print  \""i",\" $0}'>> results.csv")
+    i = i + 1
+}
diff --git a/cache_utils/src/calibration.rs b/cache_utils/src/calibration.rs
index c3590eb..c0e1c93 100644
--- a/cache_utils/src/calibration.rs
+++ b/cache_utils/src/calibration.rs
@@ -170,8 +170,16 @@ pub fn calibrate_flush(
         cache_line_size,
         array.len() as isize,
         &[
-            (load_and_flush, "clflush hit"),
-            (flush_and_flush, "clflush miss"),
+            CalibrateOperation {
+                op: load_and_flush,
+                name: "clflush_hit",
+                display_name: "clflush hit",
+            },
+            CalibrateOperation {
+                op: flush_and_flush,
+                name: "clflush_miss",
+                display_name: "clflush miss",
+            },
         ],
         CFLUSH_BUCKET_NUMBER,
         CFLUSH_BUCKET_SIZE,
@@ -189,11 +197,17 @@ pub struct CalibrateResult {
     max: Vec<u64>,
 }
 
+pub struct CalibrateOperation<'a> {
+    pub op: unsafe fn(*const u8) -> u64,
+    pub name: &'a str,
+    pub display_name: &'a str,
+}
+
 pub unsafe fn calibrate(
     p: *const u8,
     increment: usize,
     len: isize,
-    operations: &[(unsafe fn(*const u8) -> u64, &str)],
+    operations: &[CalibrateOperation],
     buckets_num: usize,
     bucket_size: usize,
     num_iterations: u32,
@@ -216,7 +230,7 @@ fn calibrate_impl(
     p: *const u8,
     increment: usize,
     len: isize,
-    operations: &[(unsafe fn(*const u8) -> u64, &str)],
+    operations: &[CalibrateOperation],
     buckets_num: usize,
     bucket_size: usize,
     num_iterations: u32,
@@ -235,7 +249,10 @@ fn calibrate_impl(
     if verbosity_level >= Thresholds {
         println!(
             "Calibrating {}...",
-            operations.iter().map(|(_, name)| { name }).format(", ")
+            operations
+                .iter()
+                .map(|operation| { operation.display_name })
+                .format(", ")
         );
     }
 
@@ -245,11 +262,30 @@ fn calibrate_impl(
     if verbosity_level >= Thresholds {
         println!(
             "CSV: address, hash, {} min, {} median, {} max",
-            operations.iter().map(|(_, name)| name).format(" min, "),
-            operations.iter().map(|(_, name)| name).format(" median, "),
-            operations.iter().map(|(_, name)| name).format(" max, ")
+            operations
+                .iter()
+                .map(|operation| operation.name)
+                .format(" min, "),
+            operations
+                .iter()
+                .map(|operation| operation.name)
+                .format(" median, "),
+            operations
+                .iter()
+                .map(|operation| operation.name)
+                .format(" max, ")
         );
     }
+    if verbosity_level >= RawResult {
+        println!(
+            "RESULT:address,hash,time,{}",
+            operations
+                .iter()
+                .map(|operation| operation.name)
+                .format(",")
+        );
+    }
+
     for i in (0..len).step_by(increment) {
         let pointer = unsafe { p.offset(i) };
         let hash = hasher.hash(pointer as usize);
@@ -271,7 +307,7 @@ fn calibrate_impl(
         for op in operations {
             let mut hist = vec![0; buckets_num];
             for _ in 0..num_iterations {
-                let time = unsafe { op.0(pointer) };
+                let time = unsafe { (op.op)(pointer) };
                 let bucket = min(buckets_num - 1, to_bucket(time));
                 hist[bucket] += 1;
             }
@@ -286,16 +322,9 @@ fn calibrate_impl(
             .map(|h| (num_iterations - h[buckets_num - 1]) / 2)
             .collect();
 
-        if verbosity_level >= RawResult {
-            println!(
-                "time {}",
-                operations.iter().map(|(_, name)| name).format(" ")
-            );
-        }
-
         for j in 0..buckets_num - 1 {
             if verbosity_level >= RawResult {
-                print!("{:3}:", from_bucket(j));
+                print!("RESULT:{:p},{:x},{}", pointer, hash, from_bucket(j));
             }
             // ignore the last bucket : spurious context switches etc.
             for op in 0..operations.len() {
@@ -305,7 +334,7 @@ fn calibrate_impl(
                 let med = &mut calibrate_result.median[op];
                 let sum = &mut sums[op];
                 if verbosity_level >= RawResult {
-                    print!("{:10}", hist);
+                    print!(",{}", hist);
                 }
 
                 if *min == 0 {
@@ -329,10 +358,10 @@ fn calibrate_impl(
             }
         }
         if verbosity_level >= Thresholds {
-            for (j, (_, op)) in operations.iter().enumerate() {
+            for (j, op) in operations.iter().enumerate() {
                 println!(
                     "{}: min {}, median {}, max {}",
-                    op,
+                    op.display_name,
                     calibrate_result.min[j],
                     calibrate_result.median[j],
                     calibrate_result.max[j]
@@ -367,7 +396,11 @@ pub fn calibrate_L3_miss_hit(
         pointer,
         cache_line_size,
         array.len() as isize,
-        &[(l3_and_reload, "L3 hit")],
+        &[CalibrateOperation {
+            op: l3_and_reload,
+            name: "l3_hit",
+            display_name: "L3 hit",
+        }],
         512,
         2,
         1 << 11,
diff --git a/cache_utils/src/lib.rs b/cache_utils/src/lib.rs
index 4532fea..a48a6e4 100644
--- a/cache_utils/src/lib.rs
+++ b/cache_utils/src/lib.rs
@@ -1,5 +1,6 @@
 #![cfg_attr(feature = "no_std", no_std)]
 #![feature(ptr_internals)]
+#![allow(clippy::missing_safety_doc)]
 
 use static_assertions::assert_cfg;
 
diff --git a/cache_utils/src/main.rs b/cache_utils/src/main.rs
index 6781bea..c935e5e 100644
--- a/cache_utils/src/main.rs
+++ b/cache_utils/src/main.rs
@@ -41,6 +41,31 @@ pub fn main() {
 
     // Let's grab all the list of CPUS
     // Then iterate the calibration on each CPU core.
+    eprint!("Warming up...");
+    for i in 0..(CpuSet::count() - 1) {
+        if old.is_set(i).unwrap() {
+            //println!("Iteration {}...", i);
+            let mut core = CpuSet::new();
+            core.set(i).unwrap();
+
+            match sched_setaffinity(Pid::from_raw(0), &core) {
+                Ok(()) => {
+                    calibrate_flush(array, 64, Verbosity::NoOutput);
+                    sched_setaffinity(Pid::from_raw(0), &old).unwrap();
+                    //println!("Iteration {}...ok ", i);
+                    eprint!(" {}", i);
+                }
+                Err(Sys(Errno::EINVAL)) => {
+                    //println!("skipping");
+                    continue;
+                }
+                Err(e) => {
+                    panic!("Unexpected error while setting affinity: {}", e);
+                }
+            }
+        }
+    }
+    eprintln!();
     for i in 0..(CpuSet::count() - 1) {
         if old.is_set(i).unwrap() {
             println!("Iteration {}...", i);
@@ -50,7 +75,7 @@ pub fn main() {
             match sched_setaffinity(Pid::from_raw(0), &core) {
                 Ok(()) => {
                     calibrate_flush(array, 64, Verbosity::NoOutput);
-                    calibrate_flush(array, 64, Verbosity::Thresholds);
+                    calibrate_flush(array, 64, Verbosity::RawResult);
                     sched_setaffinity(Pid::from_raw(0), &old).unwrap();
                     println!("Iteration {}...ok ", i);
                     eprintln!("Iteration {}...ok ", i);
diff --git a/requirements.txt b/requirements.txt
new file mode 100644
index 0000000..1ac0819
--- /dev/null
+++ b/requirements.txt
@@ -0,0 +1,3 @@
+pandas==1.0.3
+seaborn==0.10.0
+wquantiles==0.5