From 047a24ca9f7467aba5e0e7c913d769834f78fefe Mon Sep 17 00:00:00 2001 From: augustin64 Date: Wed, 4 Jan 2023 10:50:55 +0100 Subject: [PATCH 1/5] Add convolution benchmark [CUDA] --- src/scripts/benchmark_mul.py | 37 +++- src/scripts/compilation.sh | 22 ++ src/scripts/convolution_benchmark.cu | 201 ++++++++++++++++++ .../matrix_multiplication_benchmark.cu | 6 +- 4 files changed, 258 insertions(+), 8 deletions(-) create mode 100755 src/scripts/compilation.sh create mode 100644 src/scripts/convolution_benchmark.cu diff --git a/src/scripts/benchmark_mul.py b/src/scripts/benchmark_mul.py index 73f506e..dc21a70 100644 --- a/src/scripts/benchmark_mul.py +++ b/src/scripts/benchmark_mul.py @@ -17,8 +17,8 @@ def avg(vals): "depth": vals[0]["depth"] } -def mul_matrix(n, p, q): - output = subprocess.check_output(["./a.out", str(n), str(p), str(q)]) +def mul_matrix(n, p, q, executable="./a.out"): + output = subprocess.check_output([executable, str(n), str(p), str(q)]) result = [float(i.split(":")[-1]) for i in output.decode("utf8").split("\n") if i != ""] return { "GPUtime": result[0], @@ -29,14 +29,39 @@ def mul_matrix(n, p, q): "depth": p } -def generate_data(): +def conv_matrix(n, p, q, r, executable="./a.out"): + output = subprocess.check_output([executable, str(n), str(p), str(q), str(r)]) + result = [float(i.split(":")[-1]) for i in output.decode("utf8").split("\n") if i != ""] + return { + "GPUtime": result[0], + "CPUtime": result[1], + "errMax": result[2], + "errMoy": result[3], + "width": q, + "depth": p + } + +def generate_data_mul(): values = [] depth = 40 for i in range(60): values.append(avg([mul_matrix((i+1)*100, depth, (i+1)*100) for j in range(10)])) print(f"Added M({(i+1)*100}x{depth}) x M({depth}x{(i+1)*100})") - with open("result.json", "w") as file: + with open("result_mul.json", "w") as file: + json.dump(values, file, indent=4) + + +def generate_data_conv(): + values = [] + output_dim = 40 + rows = 40 + columns = 40 + for i in range(10): + values.append(avg([conv_matrix((i+1)*100, output_dim, rows, columns) for j in range(10)])) + print(f"Added ({(i+1)*100}, output_dim, rows, columns)") + + with open("result_conv.json", "w") as file: json.dump(values, file, indent=4) @@ -58,7 +83,7 @@ def plot_erreur(data): plt.plot(x, CPUtime) plt.show() -def load_data(): - with open("result.json", 'r') as f: +def load_data(filename="result.json"): + with open(filename, 'r') as f: data = json.load(f) return data \ No newline at end of file diff --git a/src/scripts/compilation.sh b/src/scripts/compilation.sh new file mode 100755 index 0000000..a8a0cba --- /dev/null +++ b/src/scripts/compilation.sh @@ -0,0 +1,22 @@ +#!/bin/bash + +BUILDDIR="../../build" +WD=$PWD + +cd $BUILDDIR/.. +make all +cd $WD + +echo "Compiling matrix_multiplication_benchmark.cu" +nvcc -ljpeg \ + matrix_multiplication_benchmark.cu \ + "$BUILDDIR/"cnn_cuda_matrix_multiplication.o \ + "$BUILDDIR/"cuda_utils.o \ + -o benchmark-matrix-multiplication + +echo "Compiling matrix_multiplication_benchmark.cu" +nvcc -ljpeg \ + convolution_benchmark.cu \ + "$BUILDDIR/"cnn_cuda_matrix_multiplication.o \ + "$BUILDDIR/"cuda_utils.o \ + -o benchmark-convolution \ No newline at end of file diff --git a/src/scripts/convolution_benchmark.cu b/src/scripts/convolution_benchmark.cu new file mode 100644 index 0000000..9ccb94a --- /dev/null +++ b/src/scripts/convolution_benchmark.cu @@ -0,0 +1,201 @@ +#include +#include +#include +#include +#include +#include + +#include "../src/cnn/include/convolution.h" +#include "../src/cnn/include/struct.h" +#include "../src/include/colors.h" +#include "../src/include/utils.h" + + +float random_float(float low, float high) { + float t = (float)rand() / (float)RAND_MAX; + return (1.0f - t) * low + t * high; +} + + +void fill_matrix_random(float ***matrix, int n, int p, int q, float max_val) { + for (int i=0; i < n; i++) { + for (int j=0; j < p; j++) { + for (int k=0; k < q; k++) { + matrix[i][j][k] = random_float(0.0f, max_val); + } + } + } +} + + +void print_matrix(float** mat, int n, int p) { + for (int i=0; i < n; i++) { + printf("[\t"); + for (int j=0; j < p; j++) { + printf("%0.1f\t", mat[i][j]); + } + printf("]\n"); + } +} + + +float*** create_matrix(int n, int p, int q, float max_val) { + float*** matrix = (float***)malloc(n*sizeof(float**)); + for (int i=0; i < n; i++) { + matrix[i] = (float**)malloc(sizeof(float*)*p); + for (int j=0; j < p; j++) { + matrix[i][j] = (float*)malloc(sizeof(float)*q); + } + } + + fill_matrix_random(matrix, n, p, q, max_val); + return matrix; +} + + +float*** create_empty_matrix(int n, int p, int q) { + float*** matrix = (float***)malloc(n*sizeof(float**)); + for (int i=0; i < n; i++) { + matrix[i] = (float**)malloc(sizeof(float*)*p); + for (int j=0; j < p; j++) { + matrix[i][j] = (float*)malloc(sizeof(float)*q); + for (int k=0; k < q; k++) { + matrix[i][j][k] = 0.; + } + } + } + return matrix; +} + +void free_matrix(float*** matrix, int n, int p) { + for (int i=0; i < n; i++) { + for (int j=0; j < p; j++) { + free(matrix[i][j]); + } + free(matrix[i]); + } + free(matrix); +} + +float max_float(float a, float b) { + return a > b ? a : b; +} + +bool check_matrices_equality(float*** m1, float*** m2, int n, int p, int q, int acceptation) { + float err_max = 0.; + float err_moy = 0.; + float err_percent = 0.; + for (int i=0; i < n; i++) { + for (int j=0; j < p; j++) { + for (int k=0; k < q; k++) { + if (fabs(m1[i][j][k] - m2[i][j][k]) > 0.01*acceptation) { + //printf(RED "diff %d %d %d: %f val: %f et %f\n" RESET, i, j, k, fabs(m1[i][j][k] - m2[i][j][k]), m1[i][j][k], m2[i][j][k]); + //return false; + } + err_percent = 2*fabs(m1[i][j][k] - m2[i][j][k])/fabs(m1[i][j][k] + m2[i][j][k]); + err_max = max_float(err_max, err_percent); + err_moy += err_percent; + } + } + } + printf("err_max:%lf\n", err_max); + printf("err_moy:%lf\n", err_moy/(n*p*q)); + return true; +} + +void run_convolution_test(int input_dim, int output_dim, int rows, int columns) { + assert(input_dim >= output_dim); + int k_size = input_dim - output_dim +1; + + // Génération des données aléatoires + Kernel_cnn* kernel = (Kernel_cnn*)malloc(sizeof(Kernel_cnn)); + + kernel->k_size = k_size; + kernel->rows = rows; + kernel->columns = columns; + + // bias[kernel->columns][dim_output][dim_output] + kernel->bias = create_matrix(kernel->columns, output_dim, output_dim, 15.0f); + kernel->d_bias = create_matrix(kernel->columns, output_dim, output_dim, 1.5f); + + // w[rows][columns][k_size][k_size] + kernel->w = (float****)malloc(sizeof(float***)*kernel->rows); + kernel->d_w = (float****)malloc(sizeof(float***)*kernel->rows); + for (int i=0; i < kernel->rows; i++) { + kernel->w[i] = create_matrix(kernel->columns, kernel->k_size, kernel->k_size, 15.0f); + kernel->d_w[i] = create_matrix(kernel->columns, kernel->k_size, kernel->k_size, 1.5f); + } + + float*** input = create_matrix(kernel->rows, input_dim, input_dim, 5.0f); + float*** output_cpu = create_empty_matrix(kernel->columns, output_dim, output_dim); + float*** output_gpu = create_empty_matrix(kernel->columns, output_dim, output_dim); + + //printf("(%d, %d, %d, %d) Data generation complete\n", rows, columns, input_dim, output_dim); + + + // Lancement des calculs + clock_t start, end; + double cpu_time_used, gpu_time_used; + + start = clock(); + make_convolution_device(kernel, input, output_gpu, output_dim); + end = clock(); + + gpu_time_used = ((double) (end - start)) / CLOCKS_PER_SEC; + printf("GPU: %lf\n", gpu_time_used); + + + start = clock(); + make_convolution_cpu(kernel, input, output_cpu, output_dim); + end = clock(); + + cpu_time_used = ((double) (end - start)) / CLOCKS_PER_SEC; + printf("CPU: %lf\n", rows, columns, input_dim, output_dim, cpu_time_used); + + // Vérification de l'égalité des matrices + //printf("(%d, %d, %d, %d) Checking equality.\n", rows, columns, input_dim, output_dim); + if (!check_matrices_equality(output_gpu, output_cpu, kernel->columns, output_dim, output_dim, kernel->k_size)) {// TODO: change acceptation + //exit(1); + } + //printf(GREEN "OK\n" RESET); + + free_matrix(kernel->bias, kernel->columns, output_dim); + free_matrix(kernel->d_bias, kernel->columns, output_dim); + + for (int i=0; i < kernel->rows; i++) { + free_matrix(kernel->w[i], kernel->columns, kernel->k_size); + free_matrix(kernel->d_w[i], kernel->columns, kernel->k_size); + } + free(kernel->w); + free(kernel->d_w); + + free_matrix(input, kernel->rows, input_dim); + free_matrix(output_cpu, kernel->columns, output_dim); + free_matrix(output_gpu, kernel->columns, output_dim); +} + + +int main(int argc, char* argv[]) { + if (argc < 5) { + return 1; + } + int n = strtol(argv[1], NULL, 10); + int p = strtol(argv[2], NULL, 10); + int q = strtol(argv[3], NULL, 10); + int r = strtol(argv[4], NULL, 10); + + /* + printf("Checking CUDA compatibility.\n"); + bool cuda_compatible = check_cuda_compatibility(); + if (!cuda_compatible) { + printf(RED "CUDA not compatible, skipping tests.\n" RESET); + return 0; + } + */ + + srand(time(NULL)); + + run_convolution_test(n, p, q, r); + + return 0; +} \ No newline at end of file diff --git a/src/scripts/matrix_multiplication_benchmark.cu b/src/scripts/matrix_multiplication_benchmark.cu index 9bcc41b..b9955b0 100644 --- a/src/scripts/matrix_multiplication_benchmark.cu +++ b/src/scripts/matrix_multiplication_benchmark.cu @@ -63,14 +63,16 @@ float max_float(float a, float b) { bool check_matrices_equality(float** m1, float** m2, int n, int p) { float err_max = 0.; float err_moy = 0.; + float err_percent = 0.; for (int i=0; i < n; i++) { for (int j=0; j < p; j++) { if (fabs(m1[i][j] - m2[i][j]) > 0.8) { //printf("%d %d\n", i, j); //return false; } - err_max = max_float(err_max, fabs(m1[i][j] - m2[i][j])); - err_moy += fabs(m1[i][j] - m2[i][j]); + err_percent = 2*fabs(m1[i][j] - m2[i][j])/fabs(m1[i][j] + m2[i][j]); + err_max = max_float(err_max, err_percent); + err_moy += err_percent; } } printf("err_max:%lf\n", err_max); From d0d68c7afd7f8a47da6470b4c23c3ad0cd2c5f7e Mon Sep 17 00:00:00 2001 From: augustin64 Date: Wed, 4 Jan 2023 10:52:39 +0100 Subject: [PATCH 2/5] Update cuda benchmark --- src/scripts/convolution_benchmark.cu | 8 ++++---- src/scripts/matrix_multiplication_benchmark.cu | 2 +- 2 files changed, 5 insertions(+), 5 deletions(-) diff --git a/src/scripts/convolution_benchmark.cu b/src/scripts/convolution_benchmark.cu index 9ccb94a..4bc6053 100644 --- a/src/scripts/convolution_benchmark.cu +++ b/src/scripts/convolution_benchmark.cu @@ -5,10 +5,10 @@ #include #include -#include "../src/cnn/include/convolution.h" -#include "../src/cnn/include/struct.h" -#include "../src/include/colors.h" -#include "../src/include/utils.h" +#include "../cnn/include/convolution.h" +#include "../cnn/include/struct.h" +#include "../include/colors.h" +#include "../include/utils.h" float random_float(float low, float high) { diff --git a/src/scripts/matrix_multiplication_benchmark.cu b/src/scripts/matrix_multiplication_benchmark.cu index b9955b0..213adb4 100644 --- a/src/scripts/matrix_multiplication_benchmark.cu +++ b/src/scripts/matrix_multiplication_benchmark.cu @@ -4,7 +4,7 @@ #include #include -#include "../cnn/matrix_multiplication.cu" +#include "../cnn/include/matrix_multiplication.h" float random_float(float low, float high) { From 9dfcba355196a03787f402aecc9c2fa8fa4e5201 Mon Sep 17 00:00:00 2001 From: augustin64 Date: Wed, 4 Jan 2023 11:00:51 +0100 Subject: [PATCH 3/5] Update benchmark tests --- src/scripts/compilation.sh | 5 +++-- src/scripts/convolution_benchmark.cu | 2 +- 2 files changed, 4 insertions(+), 3 deletions(-) diff --git a/src/scripts/compilation.sh b/src/scripts/compilation.sh index a8a0cba..5f1ece9 100755 --- a/src/scripts/compilation.sh +++ b/src/scripts/compilation.sh @@ -5,6 +5,7 @@ WD=$PWD cd $BUILDDIR/.. make all +make build/cnn_cuda_matrix_multiplication.o cd $WD echo "Compiling matrix_multiplication_benchmark.cu" @@ -14,9 +15,9 @@ nvcc -ljpeg \ "$BUILDDIR/"cuda_utils.o \ -o benchmark-matrix-multiplication -echo "Compiling matrix_multiplication_benchmark.cu" +echo "Compiling convolution_benchmark.cu" nvcc -ljpeg \ convolution_benchmark.cu \ - "$BUILDDIR/"cnn_cuda_matrix_multiplication.o \ + "$BUILDDIR/"cnn_cuda_convolution.o \ "$BUILDDIR/"cuda_utils.o \ -o benchmark-convolution \ No newline at end of file diff --git a/src/scripts/convolution_benchmark.cu b/src/scripts/convolution_benchmark.cu index 4bc6053..9dbefe6 100644 --- a/src/scripts/convolution_benchmark.cu +++ b/src/scripts/convolution_benchmark.cu @@ -150,7 +150,7 @@ void run_convolution_test(int input_dim, int output_dim, int rows, int columns) end = clock(); cpu_time_used = ((double) (end - start)) / CLOCKS_PER_SEC; - printf("CPU: %lf\n", rows, columns, input_dim, output_dim, cpu_time_used); + printf("CPU: %lf\n", cpu_time_used); // Vérification de l'égalité des matrices //printf("(%d, %d, %d, %d) Checking equality.\n", rows, columns, input_dim, output_dim); From 76547dc3825f4306bbfedc10944aa7b0cf128c31 Mon Sep 17 00:00:00 2001 From: augustin64 Date: Wed, 4 Jan 2023 11:35:49 +0100 Subject: [PATCH 4/5] Update presentation.md --- doc/presentation.md | 10 ++++++++-- 1 file changed, 8 insertions(+), 2 deletions(-) diff --git a/doc/presentation.md b/doc/presentation.md index d65d9b1..bb2a8e8 100644 --- a/doc/presentation.md +++ b/doc/presentation.md @@ -1,14 +1,20 @@ + + # Présentation du TIPE Julien Chemillier Augustin Lucas -Élèves en MP2I +Élèves en MPI* --- ## Objectif - Lien avec le sujet -![](https://augustin64.fr/tipe/geoguessr.png) +![](https://augustin64.fr/tipe/geoguessr.jpg) Note: Est-ce que vous connaissez Geoguessr ? From 5fa5a79b0c89a80427923b4ad58fab885499f734 Mon Sep 17 00:00:00 2001 From: augustin64 Date: Fri, 6 Jan 2023 18:27:56 +0100 Subject: [PATCH 5/5] Add cnn structure test --- test/cnn_structure.c | 45 ++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 45 insertions(+) create mode 100644 test/cnn_structure.c diff --git a/test/cnn_structure.c b/test/cnn_structure.c new file mode 100644 index 0000000..eb47d05 --- /dev/null +++ b/test/cnn_structure.c @@ -0,0 +1,45 @@ +#include +#include +#include +#include + +#include "../src/include/colors.h" +#include "../src/cnn/include/creation.h" +#include "../src/cnn/include/utils.h" +#include "../src/cnn/include/free.h" +#include "../src/include/colors.h" + + +int main() { + Kernel* kernel; + printf("Création du réseau\n"); + Network* network = create_network_lenet5(0, 0, 3, 2, 32, 1); + printf(GREEN "OK\n" RESET); + + printf("Architecture LeNet5:\n"); + for (int i=0; i < network->size; i++) { + kernel = network->kernel[i]; + if ((!kernel->cnn)&&(!kernel->nn)) { + printf("\n==== Couche %d de type "YELLOW"Pooling"RESET" ====\n", i); + printf("Linéarisation: %d\n", kernel->linearisation); + } else if (!kernel->cnn) { + printf("\n==== Couche %d de type "GREEN"NN"RESET" ====\n", i); + printf("input: %d\n", kernel->nn->input_units); + printf("output: %d\n", kernel->nn->output_units); + } else { + printf("\n==== Couche %d de type "BLUE"CNN"RESET" ====\n", i); + printf("k_size: %d\n", kernel->cnn->k_size); + printf("rows: %d\n", kernel->cnn->rows); + printf("columns: %d\n", kernel->cnn->columns); + } + printf("width: %d\n", network->width[i]); + printf("depth: %d\n", network->depth[i]); + } + printf(GREEN "\nOK\n" RESET); + + printf("Libération de la mémoire\n"); + free_network(network); + printf(GREEN "OK\n" RESET); + + return 0; +} \ No newline at end of file