From 047a24ca9f7467aba5e0e7c913d769834f78fefe Mon Sep 17 00:00:00 2001
From: augustin64 <me.git@augustin64.fr>
Date: Wed, 4 Jan 2023 10:50:55 +0100
Subject: [PATCH 1/5] Add convolution benchmark [CUDA]

---
 src/scripts/benchmark_mul.py                  |  37 +++-
 src/scripts/compilation.sh                    |  22 ++
 src/scripts/convolution_benchmark.cu          | 201 ++++++++++++++++++
 .../matrix_multiplication_benchmark.cu        |   6 +-
 4 files changed, 258 insertions(+), 8 deletions(-)
 create mode 100755 src/scripts/compilation.sh
 create mode 100644 src/scripts/convolution_benchmark.cu

diff --git a/src/scripts/benchmark_mul.py b/src/scripts/benchmark_mul.py
index 73f506e..dc21a70 100644
--- a/src/scripts/benchmark_mul.py
+++ b/src/scripts/benchmark_mul.py
@@ -17,8 +17,8 @@ def avg(vals):
         "depth": vals[0]["depth"]
     }
 
-def mul_matrix(n, p, q):
-    output = subprocess.check_output(["./a.out", str(n), str(p), str(q)])
+def mul_matrix(n, p, q, executable="./a.out"):
+    output = subprocess.check_output([executable, str(n), str(p), str(q)])
     result = [float(i.split(":")[-1]) for i in output.decode("utf8").split("\n") if i != ""]
     return {
         "GPUtime": result[0],
@@ -29,14 +29,39 @@ def mul_matrix(n, p, q):
         "depth": p
     }
 
-def generate_data():
+def conv_matrix(n, p, q, r, executable="./a.out"):
+    output = subprocess.check_output([executable, str(n), str(p), str(q), str(r)])
+    result = [float(i.split(":")[-1]) for i in output.decode("utf8").split("\n") if i != ""]
+    return {
+        "GPUtime": result[0],
+        "CPUtime": result[1],
+        "errMax": result[2],
+        "errMoy": result[3],
+        "width": q,
+        "depth": p
+    }
+
+def generate_data_mul():
     values = []
     depth = 40
     for i in range(60):
         values.append(avg([mul_matrix((i+1)*100, depth, (i+1)*100) for j in range(10)]))
         print(f"Added M({(i+1)*100}x{depth}) x M({depth}x{(i+1)*100})")
 
-    with open("result.json", "w") as file:
+    with open("result_mul.json", "w") as file:
+        json.dump(values, file, indent=4)
+
+
+def generate_data_conv():
+    values = []
+    output_dim = 40
+    rows = 40
+    columns = 40
+    for i in range(10):
+        values.append(avg([conv_matrix((i+1)*100, output_dim, rows, columns) for j in range(10)]))
+        print(f"Added ({(i+1)*100}, output_dim, rows, columns)")
+
+    with open("result_conv.json", "w") as file:
         json.dump(values, file, indent=4)
 
 
@@ -58,7 +83,7 @@ def plot_erreur(data):
     plt.plot(x, CPUtime)
     plt.show()
 
-def load_data():
-    with open("result.json", 'r') as f:
+def load_data(filename="result.json"):
+    with open(filename, 'r') as f:
         data = json.load(f)
     return data
\ No newline at end of file
diff --git a/src/scripts/compilation.sh b/src/scripts/compilation.sh
new file mode 100755
index 0000000..a8a0cba
--- /dev/null
+++ b/src/scripts/compilation.sh
@@ -0,0 +1,22 @@
+#!/bin/bash
+
+BUILDDIR="../../build"
+WD=$PWD
+
+cd $BUILDDIR/..
+make all
+cd $WD
+
+echo "Compiling matrix_multiplication_benchmark.cu"
+nvcc -ljpeg \
+    matrix_multiplication_benchmark.cu \
+    "$BUILDDIR/"cnn_cuda_matrix_multiplication.o \
+    "$BUILDDIR/"cuda_utils.o \
+    -o benchmark-matrix-multiplication
+
+echo "Compiling matrix_multiplication_benchmark.cu"
+nvcc -ljpeg \
+    convolution_benchmark.cu \
+    "$BUILDDIR/"cnn_cuda_matrix_multiplication.o \
+    "$BUILDDIR/"cuda_utils.o \
+    -o benchmark-convolution
\ No newline at end of file
diff --git a/src/scripts/convolution_benchmark.cu b/src/scripts/convolution_benchmark.cu
new file mode 100644
index 0000000..9ccb94a
--- /dev/null
+++ b/src/scripts/convolution_benchmark.cu
@@ -0,0 +1,201 @@
+#include <stdlib.h>
+#include <stdio.h>
+#include <stdbool.h>
+#include <assert.h>
+#include <math.h>
+#include <time.h>
+
+#include "../src/cnn/include/convolution.h"
+#include "../src/cnn/include/struct.h"
+#include "../src/include/colors.h"
+#include "../src/include/utils.h"
+
+
+float random_float(float low, float high) {
+    float t = (float)rand() / (float)RAND_MAX;
+    return (1.0f - t) * low + t * high;
+}
+
+
+void fill_matrix_random(float ***matrix, int n, int p, int q, float max_val) {
+    for (int i=0; i < n; i++) {
+        for (int j=0; j < p; j++) {
+            for (int k=0; k < q; k++) {
+                matrix[i][j][k] = random_float(0.0f, max_val);
+            }
+        }
+    }
+}
+
+
+void print_matrix(float** mat, int n, int p) {
+    for (int i=0; i < n; i++) {
+        printf("[\t");
+        for (int j=0; j < p; j++) {
+            printf("%0.1f\t", mat[i][j]);
+        }
+        printf("]\n");
+    }
+}
+
+
+float*** create_matrix(int n, int p, int q, float max_val) {
+    float*** matrix = (float***)malloc(n*sizeof(float**));
+    for (int i=0; i < n; i++) {
+        matrix[i] = (float**)malloc(sizeof(float*)*p);
+        for (int j=0; j < p; j++) {
+            matrix[i][j] = (float*)malloc(sizeof(float)*q);
+        }
+    }
+
+    fill_matrix_random(matrix, n, p, q, max_val);
+    return matrix;
+}
+
+
+float*** create_empty_matrix(int n, int p, int q) {
+    float*** matrix = (float***)malloc(n*sizeof(float**));
+    for (int i=0; i < n; i++) {
+        matrix[i] = (float**)malloc(sizeof(float*)*p);
+        for (int j=0; j < p; j++) {
+            matrix[i][j] = (float*)malloc(sizeof(float)*q);
+            for (int k=0; k < q; k++) {
+                matrix[i][j][k] = 0.;
+            }
+        }
+    }
+    return matrix;
+}
+
+void free_matrix(float*** matrix, int n, int p) {
+    for (int i=0; i < n; i++) {
+        for (int j=0; j < p; j++) {
+            free(matrix[i][j]);
+        }
+        free(matrix[i]);
+    }
+    free(matrix);
+}
+
+float max_float(float a, float b) {
+    return a > b ? a : b;
+}
+
+bool check_matrices_equality(float*** m1, float*** m2, int n, int p, int q, int acceptation) {
+    float err_max = 0.;
+    float err_moy = 0.;
+    float err_percent = 0.;
+    for (int i=0; i < n; i++) {
+        for (int j=0; j < p; j++) {
+            for (int k=0; k < q; k++) {
+                if (fabs(m1[i][j][k] - m2[i][j][k]) > 0.01*acceptation) {
+                    //printf(RED "diff %d %d %d: %f val: %f et %f\n" RESET, i, j, k, fabs(m1[i][j][k] - m2[i][j][k]), m1[i][j][k], m2[i][j][k]);
+                    //return false;
+                }
+                err_percent = 2*fabs(m1[i][j][k] - m2[i][j][k])/fabs(m1[i][j][k] + m2[i][j][k]);
+                err_max = max_float(err_max, err_percent);
+                err_moy += err_percent;
+            }
+        }
+    }
+    printf("err_max:%lf\n", err_max);
+    printf("err_moy:%lf\n", err_moy/(n*p*q));
+    return true;
+}
+
+void run_convolution_test(int input_dim, int output_dim, int rows, int columns) {
+    assert(input_dim >= output_dim);
+    int k_size = input_dim - output_dim +1;
+
+    // Génération des données aléatoires
+    Kernel_cnn* kernel = (Kernel_cnn*)malloc(sizeof(Kernel_cnn));
+    
+    kernel->k_size = k_size;
+    kernel->rows = rows;
+    kernel->columns = columns;
+
+    // bias[kernel->columns][dim_output][dim_output]
+    kernel->bias = create_matrix(kernel->columns, output_dim, output_dim, 15.0f);
+    kernel->d_bias = create_matrix(kernel->columns, output_dim, output_dim, 1.5f);
+
+    // w[rows][columns][k_size][k_size]
+    kernel->w = (float****)malloc(sizeof(float***)*kernel->rows);
+    kernel->d_w = (float****)malloc(sizeof(float***)*kernel->rows);
+    for (int i=0; i < kernel->rows; i++) {
+        kernel->w[i] = create_matrix(kernel->columns, kernel->k_size, kernel->k_size, 15.0f);
+        kernel->d_w[i] = create_matrix(kernel->columns, kernel->k_size, kernel->k_size, 1.5f);
+    }
+
+    float*** input = create_matrix(kernel->rows, input_dim, input_dim, 5.0f);
+    float*** output_cpu = create_empty_matrix(kernel->columns, output_dim, output_dim);
+    float*** output_gpu = create_empty_matrix(kernel->columns, output_dim, output_dim);
+
+    //printf("(%d, %d, %d, %d) Data generation complete\n", rows, columns, input_dim, output_dim);
+
+
+    // Lancement des calculs
+    clock_t start, end;
+    double cpu_time_used, gpu_time_used;
+
+    start = clock();
+    make_convolution_device(kernel, input, output_gpu, output_dim);
+    end = clock();
+
+    gpu_time_used = ((double) (end - start)) / CLOCKS_PER_SEC;
+    printf("GPU: %lf\n", gpu_time_used);
+
+
+    start = clock();
+    make_convolution_cpu(kernel, input, output_cpu, output_dim);
+    end = clock();
+
+    cpu_time_used = ((double) (end - start)) / CLOCKS_PER_SEC;
+    printf("CPU: %lf\n", rows, columns, input_dim, output_dim, cpu_time_used);    
+
+    // Vérification de l'égalité des matrices
+    //printf("(%d, %d, %d, %d) Checking equality.\n", rows, columns, input_dim, output_dim);
+    if (!check_matrices_equality(output_gpu, output_cpu, kernel->columns, output_dim, output_dim, kernel->k_size)) {// TODO: change acceptation
+        //exit(1);
+    }
+    //printf(GREEN "OK\n" RESET);
+
+    free_matrix(kernel->bias, kernel->columns, output_dim);
+    free_matrix(kernel->d_bias, kernel->columns, output_dim);
+
+    for (int i=0; i < kernel->rows; i++) {
+        free_matrix(kernel->w[i], kernel->columns, kernel->k_size);
+        free_matrix(kernel->d_w[i], kernel->columns, kernel->k_size);
+    }
+    free(kernel->w);
+    free(kernel->d_w);
+
+    free_matrix(input, kernel->rows, input_dim);
+    free_matrix(output_cpu, kernel->columns, output_dim);
+    free_matrix(output_gpu, kernel->columns, output_dim);
+}
+
+
+int main(int argc, char* argv[]) {
+    if (argc < 5) {
+        return 1;
+    }
+    int n = strtol(argv[1], NULL, 10);
+    int p = strtol(argv[2], NULL, 10);
+    int q = strtol(argv[3], NULL, 10);
+    int r = strtol(argv[4], NULL, 10);
+
+    /*
+    printf("Checking CUDA compatibility.\n");
+    bool cuda_compatible = check_cuda_compatibility();
+    if (!cuda_compatible) {
+        printf(RED "CUDA not compatible, skipping tests.\n" RESET);
+        return 0;
+    }
+    */
+    
+    srand(time(NULL));
+
+    run_convolution_test(n, p, q, r);
+    
+    return 0;
+}
\ No newline at end of file
diff --git a/src/scripts/matrix_multiplication_benchmark.cu b/src/scripts/matrix_multiplication_benchmark.cu
index 9bcc41b..b9955b0 100644
--- a/src/scripts/matrix_multiplication_benchmark.cu
+++ b/src/scripts/matrix_multiplication_benchmark.cu
@@ -63,14 +63,16 @@ float max_float(float a, float b) {
 bool check_matrices_equality(float** m1, float** m2, int n, int p) {
     float err_max = 0.;
     float err_moy = 0.;
+    float err_percent = 0.;
     for (int i=0; i < n; i++) {
         for (int j=0; j < p; j++) {
             if (fabs(m1[i][j] - m2[i][j]) > 0.8) {
                 //printf("%d %d\n", i, j);
                 //return false;
             }
-            err_max = max_float(err_max, fabs(m1[i][j] - m2[i][j]));
-            err_moy += fabs(m1[i][j] - m2[i][j]);
+            err_percent = 2*fabs(m1[i][j] - m2[i][j])/fabs(m1[i][j] + m2[i][j]);
+            err_max = max_float(err_max, err_percent);
+            err_moy += err_percent;
         }
     }
     printf("err_max:%lf\n", err_max);

From d0d68c7afd7f8a47da6470b4c23c3ad0cd2c5f7e Mon Sep 17 00:00:00 2001
From: augustin64 <me.git@augustin64.fr>
Date: Wed, 4 Jan 2023 10:52:39 +0100
Subject: [PATCH 2/5] Update cuda benchmark

---
 src/scripts/convolution_benchmark.cu           | 8 ++++----
 src/scripts/matrix_multiplication_benchmark.cu | 2 +-
 2 files changed, 5 insertions(+), 5 deletions(-)

diff --git a/src/scripts/convolution_benchmark.cu b/src/scripts/convolution_benchmark.cu
index 9ccb94a..4bc6053 100644
--- a/src/scripts/convolution_benchmark.cu
+++ b/src/scripts/convolution_benchmark.cu
@@ -5,10 +5,10 @@
 #include <math.h>
 #include <time.h>
 
-#include "../src/cnn/include/convolution.h"
-#include "../src/cnn/include/struct.h"
-#include "../src/include/colors.h"
-#include "../src/include/utils.h"
+#include "../cnn/include/convolution.h"
+#include "../cnn/include/struct.h"
+#include "../include/colors.h"
+#include "../include/utils.h"
 
 
 float random_float(float low, float high) {
diff --git a/src/scripts/matrix_multiplication_benchmark.cu b/src/scripts/matrix_multiplication_benchmark.cu
index b9955b0..213adb4 100644
--- a/src/scripts/matrix_multiplication_benchmark.cu
+++ b/src/scripts/matrix_multiplication_benchmark.cu
@@ -4,7 +4,7 @@
 #include <math.h>
 #include <time.h>
 
-#include "../cnn/matrix_multiplication.cu"
+#include "../cnn/include/matrix_multiplication.h"
 
 
 float random_float(float low, float high) {

From 9dfcba355196a03787f402aecc9c2fa8fa4e5201 Mon Sep 17 00:00:00 2001
From: augustin64 <me.git@augustin64.fr>
Date: Wed, 4 Jan 2023 11:00:51 +0100
Subject: [PATCH 3/5] Update benchmark tests

---
 src/scripts/compilation.sh           | 5 +++--
 src/scripts/convolution_benchmark.cu | 2 +-
 2 files changed, 4 insertions(+), 3 deletions(-)

diff --git a/src/scripts/compilation.sh b/src/scripts/compilation.sh
index a8a0cba..5f1ece9 100755
--- a/src/scripts/compilation.sh
+++ b/src/scripts/compilation.sh
@@ -5,6 +5,7 @@ WD=$PWD
 
 cd $BUILDDIR/..
 make all
+make build/cnn_cuda_matrix_multiplication.o
 cd $WD
 
 echo "Compiling matrix_multiplication_benchmark.cu"
@@ -14,9 +15,9 @@ nvcc -ljpeg \
     "$BUILDDIR/"cuda_utils.o \
     -o benchmark-matrix-multiplication
 
-echo "Compiling matrix_multiplication_benchmark.cu"
+echo "Compiling convolution_benchmark.cu"
 nvcc -ljpeg \
     convolution_benchmark.cu \
-    "$BUILDDIR/"cnn_cuda_matrix_multiplication.o \
+    "$BUILDDIR/"cnn_cuda_convolution.o \
     "$BUILDDIR/"cuda_utils.o \
     -o benchmark-convolution
\ No newline at end of file
diff --git a/src/scripts/convolution_benchmark.cu b/src/scripts/convolution_benchmark.cu
index 4bc6053..9dbefe6 100644
--- a/src/scripts/convolution_benchmark.cu
+++ b/src/scripts/convolution_benchmark.cu
@@ -150,7 +150,7 @@ void run_convolution_test(int input_dim, int output_dim, int rows, int columns)
     end = clock();
 
     cpu_time_used = ((double) (end - start)) / CLOCKS_PER_SEC;
-    printf("CPU: %lf\n", rows, columns, input_dim, output_dim, cpu_time_used);    
+    printf("CPU: %lf\n", cpu_time_used);    
 
     // Vérification de l'égalité des matrices
     //printf("(%d, %d, %d, %d) Checking equality.\n", rows, columns, input_dim, output_dim);

From 76547dc3825f4306bbfedc10944aa7b0cf128c31 Mon Sep 17 00:00:00 2001
From: augustin64 <me.git@augustin64.fr>
Date: Wed, 4 Jan 2023 11:35:49 +0100
Subject: [PATCH 4/5] Update presentation.md

---
 doc/presentation.md | 10 ++++++++--
 1 file changed, 8 insertions(+), 2 deletions(-)

diff --git a/doc/presentation.md b/doc/presentation.md
index d65d9b1..bb2a8e8 100644
--- a/doc/presentation.md
+++ b/doc/presentation.md
@@ -1,14 +1,20 @@
+<!--
+Reveal.initialize({
+    slideNumber: true
+});
+-->
+
 # Présentation du TIPE
 
 Julien Chemillier  
 Augustin Lucas  
-Élèves en MP2I
+Élèves en MPI*
 
 ---
 
 ## Objectif - Lien avec le sujet
 
-![](https://augustin64.fr/tipe/geoguessr.png)
+![](https://augustin64.fr/tipe/geoguessr.jpg)
 
 Note:
 Est-ce que vous connaissez Geoguessr ?  

From 5fa5a79b0c89a80427923b4ad58fab885499f734 Mon Sep 17 00:00:00 2001
From: augustin64 <me.git@augustin64.fr>
Date: Fri, 6 Jan 2023 18:27:56 +0100
Subject: [PATCH 5/5] Add cnn structure test

---
 test/cnn_structure.c | 45 ++++++++++++++++++++++++++++++++++++++++++++
 1 file changed, 45 insertions(+)
 create mode 100644 test/cnn_structure.c

diff --git a/test/cnn_structure.c b/test/cnn_structure.c
new file mode 100644
index 0000000..eb47d05
--- /dev/null
+++ b/test/cnn_structure.c
@@ -0,0 +1,45 @@
+#include <stdlib.h>
+#include <stdio.h>
+#include <stdint.h>
+#include <inttypes.h>
+
+#include "../src/include/colors.h"
+#include "../src/cnn/include/creation.h"
+#include "../src/cnn/include/utils.h"
+#include "../src/cnn/include/free.h"
+#include "../src/include/colors.h"
+
+
+int main() {
+    Kernel* kernel;
+    printf("Création du réseau\n");
+    Network* network = create_network_lenet5(0, 0, 3, 2, 32, 1);
+    printf(GREEN "OK\n" RESET);
+
+    printf("Architecture LeNet5:\n");
+    for (int i=0; i < network->size; i++) {
+        kernel = network->kernel[i];
+        if ((!kernel->cnn)&&(!kernel->nn)) {
+            printf("\n==== Couche %d de type "YELLOW"Pooling"RESET" ====\n", i);
+            printf("Linéarisation: %d\n", kernel->linearisation);
+        } else if (!kernel->cnn) {
+            printf("\n==== Couche %d de type "GREEN"NN"RESET" ====\n", i);
+            printf("input: %d\n", kernel->nn->input_units);
+            printf("output: %d\n", kernel->nn->output_units);
+        } else {
+            printf("\n==== Couche %d de type "BLUE"CNN"RESET" ====\n", i);
+            printf("k_size: %d\n", kernel->cnn->k_size);
+            printf("rows: %d\n", kernel->cnn->rows);
+            printf("columns: %d\n", kernel->cnn->columns);
+        }
+        printf("width: %d\n", network->width[i]);
+        printf("depth: %d\n", network->depth[i]);
+    }
+    printf(GREEN "\nOK\n" RESET);
+
+    printf("Libération de la mémoire\n");
+    free_network(network);
+    printf(GREEN "OK\n" RESET);
+
+    return 0;
+}
\ No newline at end of file