From 677de892e7403571385ebf737727e6c6641051f3 Mon Sep 17 00:00:00 2001
From: augustin64 <augustin.lucas64@gmail.com>
Date: Sun, 26 Mar 2023 17:14:07 +0200
Subject: [PATCH] Add function.cu Translate `apply_function_input` and
 `get_activation_function` to CUDA

---
 Makefile                          |   5 +-
 src/cnn/backpropagation.c         |   6 +-
 src/cnn/cnn.c                     |   4 +-
 src/cnn/function.c                | 283 +++++++++++++++++++----
 src/cnn/function.cu               | 361 ++++++++++++++++++++++++++++++
 src/cnn/include/backpropagation.h |   6 +-
 src/cnn/include/function.h        |  88 +++++++-
 test/cnn_function.cu              |  60 +++++
 8 files changed, 751 insertions(+), 62 deletions(-)
 create mode 100644 src/cnn/function.cu
 create mode 100644 test/cnn_function.cu

diff --git a/Makefile b/Makefile
index 0c40477..5b4a3b2 100644
--- a/Makefile
+++ b/Makefile
@@ -95,7 +95,7 @@ $(BUILDDIR)/cnn-main-cuda: $(BUILDDIR)/cnn_main.cuda.o \
 		$(BUILDDIR)/cnn_initialisation.cuda.o \
 		$(BUILDDIR)/cnn_cuda_make.o \
 		$(BUILDDIR)/cnn_neuron_io.cuda.o \
-		$(BUILDDIR)/cnn_function.cuda.o  \
+		$(BUILDDIR)/cnn_cuda_function.o  \
 		$(BUILDDIR)/cnn_utils.cuda.o \
 		$(BUILDDIR)/cnn_update.cuda.o \
 		$(BUILDDIR)/cnn_free.cuda.o \
@@ -223,4 +223,5 @@ $(CACHE_DIR)/mnist-reseau-cnn.bin: $(BUILDDIR)/cnn-main
 #
 clean:
 	rm -rf $(BUILDDIR)/*
-	rm -f $(CACHE_DIR)/*
\ No newline at end of file
+
+#rm -f $(CACHE_DIR)/*
\ No newline at end of file
diff --git a/src/cnn/backpropagation.c b/src/cnn/backpropagation.c
index 3b48556..758842c 100644
--- a/src/cnn/backpropagation.c
+++ b/src/cnn/backpropagation.c
@@ -82,7 +82,7 @@ void backward_max_pooling(float*** input, float*** output, int input_width, int
     }
 }
 
-void backward_dense(Kernel_nn* ker, float* input, float* input_z, float* output, int size_input, int size_output, ptr d_function, int is_first) {
+void backward_dense(Kernel_nn* ker, float* input, float* input_z, float* output, int size_input, int size_output, funcPtr d_function, int is_first) {
     // Bias
     for (int j=0; j < size_output; j++) {
         ker->d_bias[j] += output[j];
@@ -109,7 +109,7 @@ void backward_dense(Kernel_nn* ker, float* input, float* input_z, float* output,
     }
 }
 
-void backward_linearisation(Kernel_nn* ker, float*** input, float*** input_z, float* output, int depth_input, int dim_input, int size_output, ptr d_function) {
+void backward_linearisation(Kernel_nn* ker, float*** input, float*** input_z, float* output, int depth_input, int dim_input, int size_output,funcPtr d_function) {
     // Bias
     for (int j=0; j < size_output; j++) {
         ker->d_bias[j] += output[j];
@@ -144,7 +144,7 @@ void backward_linearisation(Kernel_nn* ker, float*** input, float*** input_z, fl
     }
 }
 
-void backward_convolution(Kernel_cnn* ker, float*** input, float*** input_z, float*** output, int depth_input, int dim_input, int depth_output, int dim_output, ptr d_function, int is_first) {
+void backward_convolution(Kernel_cnn* ker, float*** input, float*** input_z, float*** output, int depth_input, int dim_input, int depth_output, int dim_output, funcPtr d_function, int is_first) {
     // Bias
     for (int i=0; i < depth_output; i++) {
         for (int j=0; j < dim_output; j++) {
diff --git a/src/cnn/cnn.c b/src/cnn/cnn.c
index abc8357..fc4894b 100644
--- a/src/cnn/cnn.c
+++ b/src/cnn/cnn.c
@@ -251,10 +251,10 @@ void backward_propagation(Network* network, int wanted_number) {
 
 
         if (k_i->cnn) { // Convolution
-            ptr d_f = get_activation_function(-activation);
+            funcPtr d_f = get_activation_function(-activation);
             backward_convolution(k_i->cnn, input, input_z, output, input_depth, input_width, output_depth, output_width, d_f, i==0);
         } else if (k_i->nn) { // Full connection
-            ptr d_f = get_activation_function(-activation);
+            funcPtr d_f = get_activation_function(-activation);
             if (k_i->linearisation == DOESNT_LINEARISE) { // Vecteur -> Vecteur
                 backward_dense(k_i->nn, input[0][0], input_z[0][0], output[0][0], input_width, output_width, d_f, i==0);
             } else { // Matrice -> vecteur
diff --git a/src/cnn/function.c b/src/cnn/function.c
index 0143731..e362e42 100644
--- a/src/cnn/function.c
+++ b/src/cnn/function.c
@@ -3,9 +3,26 @@
 #include <float.h>
 
 #include "../include/colors.h"
+#include "../include/utils.h"
 
 #include "include/function.h"
 
+#define BLOCKSIZE_x 16
+#define BLOCKSIZE_y 8
+#define BLOCKSIZE_z 8
+
+//* Identity
+#ifdef __CUDACC__
+__device__ float device_identity(float x) {
+    return x;
+}
+
+__device__ float device_identity_derivative(float x) {
+    (void)x;
+    return 1;
+}
+#endif
+
 float identity(float x) {
     return x;
 }
@@ -16,6 +33,18 @@ float identity_derivative(float x) {
 }
 
 
+//* Sigmoid
+#ifdef __CUDACC__
+__device__ float device_sigmoid(float x) {
+    return 1/(1 + exp(-x));
+}
+
+__device__ float device_sigmoid_derivative(float x) {
+    float tmp = exp(-x);
+    return tmp/((1+tmp)*(1+tmp));
+}
+#endif
+
 float sigmoid(float x) {
     return 1/(1 + exp(-x));
 }
@@ -26,6 +55,19 @@ float sigmoid_derivative(float x) {
 }
 
 
+//* RELU
+#ifdef __CUDACC__
+__device__ float device_relu(float x) {
+    return fmaxf(0, fminf(x, RELU_CLIP_VALUE));
+}
+
+__device__ float device_relu_derivative(float x) {
+    if (x > 0)
+        return 1;
+    return 0;
+}
+#endif
+
 float relu(float x) {
     return fmaxf(0, fminf(x, RELU_CLIP_VALUE));
 }
@@ -37,6 +79,21 @@ float relu_derivative(float x) {
 }
 
 
+//* Leaky RELU
+#ifdef __CUDACC__
+__device__ float device_leaky_relu(float x) {
+    if (x>0)
+        return fminf(x, RELU_CLIP_VALUE);
+    return x*LEAKER;
+}
+
+__device__ float device_leaky_relu_derivative(float x) {
+    if (x > 0)
+        return 1;
+    return LEAKER;
+}
+#endif
+
 float leaky_relu(float x) {
     if (x>0)
         return fminf(x, RELU_CLIP_VALUE);
@@ -50,6 +107,22 @@ float leaky_relu_derivative(float x) {
 }
 
 
+//* Tanh
+#ifdef __CUDACC__
+__device__
+#endif
+float device_tanh_(float x) {
+    return tanh(x);
+}
+
+#ifdef __CUDACC__
+__device__
+#endif
+float device_tanh_derivative(float x) {
+    float a = tanh(x);
+    return 1 - a*a;
+}
+
 float tanh_(float x) {
     return tanh(x);
 }
@@ -60,6 +133,28 @@ float tanh_derivative(float x) {
 }
 
 
+
+
+#ifdef __CUDACC__
+/*
+ * Définition des pointeurs de fonctions pour CUDA
+ * voir https://stackoverflow.com/a/15646771
+*/
+__device__ funcPtr ptr_sigmoid = device_sigmoid;
+__device__ funcPtr ptr_relu = device_relu;
+__device__ funcPtr ptr_leaky_relu = device_leaky_relu;
+__device__ funcPtr ptr_tanh = device_tanh_;
+__device__ funcPtr ptr_identity = device_identity;
+
+__device__ funcPtr ptr_identity_derivative = device_identity_derivative;
+__device__ funcPtr ptr_sigmoid_derivative = device_sigmoid_derivative;
+__device__ funcPtr ptr_relu_derivative = device_relu_derivative;
+__device__ funcPtr ptr_leaky_relu_derivative = device_leaky_relu_derivative;
+__device__ funcPtr ptr_tanh_derivative = device_tanh_derivative;
+#endif
+
+
+
 void apply_softmax_input(float ***input, int depth, int rows, int columns) {
     float m = -FLT_MAX;
     float sum=0;
@@ -88,7 +183,41 @@ void apply_softmax_input(float ***input, int depth, int rows, int columns) {
 }
 
 
-void apply_function_input(float (*f)(float), float*** input, int depth, int rows, int columns) {
+/* 
+* Apply function on input
+*/
+#ifdef __CUDACC__
+__global__ void apply_function_input_kernel(funcPtr f, float*** input, int depth, int rows, int columns) {
+    // Équivalents respectifs de i, j et k dans la boucle effectuée par le cpu
+    int idx = threadIdx.x + blockDim.x*blockIdx.x; // < depth
+    int idy = threadIdx.y + blockDim.y*blockIdx.y; // < rows
+    int idz = threadIdx.z + blockDim.z*blockIdx.z; // < columns
+
+    if (idx >= depth || idy >= rows || idz >= columns) {
+        return;
+    }
+
+    input[idx][idy][idz] = (*f)(input[idx][idy][idz]);
+}
+
+
+void apply_function_input_device(int activation, float*** input, int depth, int rows, int columns) {
+    // Make computation
+    dim3 gridSize(i_div_up(depth, BLOCKSIZE_x), i_div_up(rows, BLOCKSIZE_y), i_div_up(columns, BLOCKSIZE_z));
+    dim3 blockSize(BLOCKSIZE_x, BLOCKSIZE_y, BLOCKSIZE_z);
+
+    funcPtr activation_function = get_activation_function_cuda(activation);
+
+    apply_function_input_kernel<<<gridSize, blockSize>>>(activation_function, input, depth, rows, columns);
+    
+    gpuErrchk( cudaPeekAtLastError() );
+    gpuErrchk( cudaDeviceSynchronize() );
+}
+#endif
+
+void apply_function_input_cpu(int activation, float*** input, int depth, int rows, int columns) {
+    funcPtr f = get_activation_function(activation);
+
     for (int i=0; i < depth; i++) {
         for (int j=0; j < rows; j++) {
             for (int k=0; k < columns; k++) {
@@ -98,15 +227,25 @@ void apply_function_input(float (*f)(float), float*** input, int depth, int rows
     }
 }
 
+#ifdef __CUDACC__
+extern "C"
+#endif
+void apply_function_input(int activation, float*** input, int depth, int rows, int columns) {
+    #ifndef __CUDACC__
+    apply_function_input_cpu(activation, input, depth, rows, columns);
+    #else
+    apply_function_input_device(activation, input, depth, rows, columns);
+    #endif
+}
+
 void apply_function_to_matrix(int activation, float*** input, int depth, int dim) {
     if (activation == SOFTMAX) {
         return apply_softmax_input(input, depth, dim, dim);
     }
     if (activation >= 1) { // Exclude negative values (derivative)
-        ptr f = get_activation_function(activation);
-        return apply_function_input(f, input, depth, dim, dim);
+        return apply_function_input(activation, input, depth, dim, dim);
     }
-    printf_error("fonction d'activation inconnue (apply_function_to_matrix): ");
+    printf_error((char*)"fonction d'activation inconnue (apply_function_to_matrix): ");
     printf("%d\n", activation);
 }
 
@@ -116,59 +255,107 @@ void apply_function_to_vector(int activation, float*** input, int dim) {
         return apply_softmax_input(input, 1, 1, dim);
     }
     if (activation >= 1) { // Exclude negative values (derivative)
-        ptr f = get_activation_function(activation);
-        return apply_function_input(f, input, 1, 1, dim);
+        return apply_function_input(activation, input, 1, 1, dim);
     }
-    printf_error("fonction d'activation inconnue (apply_function_to_vector): ");
+    printf_error((char*)"fonction d'activation inconnue (apply_function_to_vector): ");
     printf("%d\n", activation);
 }
 
 
-ptr get_activation_function(int activation) {
-    if (activation == RELU) {
-        return &relu;
-    }
-    if (activation == -RELU) {
-        return &relu_derivative;
-    }
+funcPtr get_activation_function(int activation) {
+    switch (activation) {
+        case RELU:
+            return &relu;
+        case -RELU:
+            return &relu_derivative;
 
-    if (activation == IDENTITY) {
-        return &identity;
-    }
-    if (activation == -IDENTITY) {
-        return &identity_derivative;
-    }
+        case IDENTITY:
+            return &identity;
+        case -IDENTITY:
+            return &identity_derivative;
 
-    if (activation == SIGMOID) {
-        return &sigmoid;
-    }
-    if (activation == -SIGMOID) {
-        return &sigmoid_derivative;
-    }
+        case SIGMOID:
+            return &sigmoid;
+        case -SIGMOID:
+            return &sigmoid_derivative;
+        
+        case LEAKY_RELU:
+            return &leaky_relu;
+        case -LEAKY_RELU:
+            return &leaky_relu_derivative;
 
-    if (activation == SOFTMAX) {
-        printf_error("impossible de renvoyer la fonction softmax\n");
-        return NULL;
-    }
-    if (activation == -SOFTMAX) {
-        printf_error("impossible de renvoyer la dérivée de la fonction softmax\n");
-        return NULL;
-    }
+        case TANH:
+            return &tanh_;
+        case -TANH:
+            return &tanh_derivative;
 
-    if (activation == TANH) {
-        return &tanh_;
-    }
-    if (activation == -TANH) {
-        return &tanh_derivative;
-    }
+        case SOFTMAX:
+            printf_error((char*)"impossible de renvoyer la fonction softmax\n");
+            return NULL;
+        case -SOFTMAX:
+            printf_error((char*)"impossible de renvoyer la dérivée de la fonction softmax\n");
+            return NULL;
 
-    if (activation == LEAKY_RELU) {
-        return &leaky_relu;
+        default:
+            printf_error((char*)"fonction d'activation inconnue (get_activation_function_cuda): ");
+            printf("%d\n", activation);
+            return NULL;
     }
-    if (activation == -LEAKY_RELU) {
-        return &leaky_relu_derivative;
-    }
-    printf_error("fonction d'activation inconnue (get_activation_function): ");
-    printf("%d\n", activation);
-    return NULL;
 }
+
+
+#ifdef __CUDACC__
+funcPtr get_activation_function_cuda(int activation) {
+    funcPtr host_function;
+    
+    switch (activation) {
+        case RELU:
+            gpuErrchk( cudaMemcpyFromSymbol(&host_function, ptr_relu, sizeof(funcPtr)));
+            break;
+        case -RELU:
+            gpuErrchk( cudaMemcpyFromSymbol(&host_function, ptr_relu_derivative, sizeof(funcPtr)));
+            break;
+
+        case IDENTITY:
+            gpuErrchk( cudaMemcpyFromSymbol(&host_function, ptr_identity, sizeof(funcPtr)));
+            break;
+        case -IDENTITY:
+            gpuErrchk( cudaMemcpyFromSymbol(&host_function, ptr_identity_derivative, sizeof(funcPtr)));
+            break;
+
+        case SIGMOID:
+            gpuErrchk( cudaMemcpyFromSymbol(&host_function, ptr_sigmoid, sizeof(funcPtr)));
+            break;
+        case -SIGMOID:
+            gpuErrchk( cudaMemcpyFromSymbol(&host_function, ptr_sigmoid_derivative, sizeof(funcPtr)));
+            break;
+        
+        case LEAKY_RELU:
+            gpuErrchk( cudaMemcpyFromSymbol(&host_function, ptr_leaky_relu, sizeof(funcPtr)));
+            break;
+        case -LEAKY_RELU:
+            gpuErrchk( cudaMemcpyFromSymbol(&host_function, ptr_leaky_relu_derivative, sizeof(funcPtr)));
+            break;
+
+        case TANH:
+            gpuErrchk( cudaMemcpyFromSymbol(&host_function, ptr_tanh, sizeof(funcPtr)));
+            break;
+        case -TANH:
+            gpuErrchk( cudaMemcpyFromSymbol(&host_function, ptr_tanh_derivative, sizeof(funcPtr)));
+            break;
+
+        case SOFTMAX:
+            printf_error((char*)"impossible de renvoyer la fonction softmax\n");
+            return NULL;
+        case -SOFTMAX:
+            printf_error((char*)"impossible de renvoyer la dérivée de la fonction softmax\n");
+            return NULL;
+
+        default:
+            printf_error((char*)"fonction d'activation inconnue (get_activation_function_cuda): ");
+            printf("%d\n", activation);
+            return NULL;
+    }
+    return host_function;
+}
+#endif
\ No newline at end of file
diff --git a/src/cnn/function.cu b/src/cnn/function.cu
new file mode 100644
index 0000000..e362e42
--- /dev/null
+++ b/src/cnn/function.cu
@@ -0,0 +1,361 @@
+#include <stdio.h>
+#include <math.h>
+#include <float.h>
+
+#include "../include/colors.h"
+#include "../include/utils.h"
+
+#include "include/function.h"
+
+#define BLOCKSIZE_x 16
+#define BLOCKSIZE_y 8
+#define BLOCKSIZE_z 8
+
+//* Identity
+#ifdef __CUDACC__
+__device__ float device_identity(float x) {
+    return x;
+}
+
+__device__ float device_identity_derivative(float x) {
+    (void)x;
+    return 1;
+}
+#endif
+
+float identity(float x) {
+    return x;
+}
+
+float identity_derivative(float x) {
+    (void)x;
+    return 1;
+}
+
+
+//* Sigmoid
+#ifdef __CUDACC__
+__device__ float device_sigmoid(float x) {
+    return 1/(1 + exp(-x));
+}
+
+__device__ float device_sigmoid_derivative(float x) {
+    float tmp = exp(-x);
+    return tmp/((1+tmp)*(1+tmp));
+}
+#endif
+
+float sigmoid(float x) {
+    return 1/(1 + exp(-x));
+}
+
+float sigmoid_derivative(float x) {
+    float tmp = exp(-x);
+    return tmp/((1+tmp)*(1+tmp));
+}
+
+
+//* RELU
+#ifdef __CUDACC__
+__device__ float device_relu(float x) {
+    return fmaxf(0, fminf(x, RELU_CLIP_VALUE));
+}
+
+__device__ float device_relu_derivative(float x) {
+    if (x > 0)
+        return 1;
+    return 0;
+}
+#endif
+
+float relu(float x) {
+    return fmaxf(0, fminf(x, RELU_CLIP_VALUE));
+}
+
+float relu_derivative(float x) {
+    if (x > 0)
+        return 1;
+    return 0;
+}
+
+
+//* Leaky RELU
+#ifdef __CUDACC__
+__device__ float device_leaky_relu(float x) {
+    if (x>0)
+        return fminf(x, RELU_CLIP_VALUE);
+    return x*LEAKER;
+}
+
+__device__ float device_leaky_relu_derivative(float x) {
+    if (x > 0)
+        return 1;
+    return LEAKER;
+}
+#endif
+
+float leaky_relu(float x) {
+    if (x>0)
+        return fminf(x, RELU_CLIP_VALUE);
+    return x*LEAKER;
+}
+
+float leaky_relu_derivative(float x) {
+    if (x > 0)
+        return 1;
+    return LEAKER;
+}
+
+
+//* Tanh
+#ifdef __CUDACC__
+__device__
+#endif
+float device_tanh_(float x) {
+    return tanh(x);
+}
+
+#ifdef __CUDACC__
+__device__
+#endif
+float device_tanh_derivative(float x) {
+    float a = tanh(x);
+    return 1 - a*a;
+}
+
+float tanh_(float x) {
+    return tanh(x);
+}
+
+float tanh_derivative(float x) {
+    float a = tanh(x);
+    return 1 - a*a;
+}
+
+
+
+
+#ifdef __CUDACC__
+/*
+ * Définition des pointeurs de fonctions pour CUDA
+ * voir https://stackoverflow.com/a/15646771
+*/
+__device__ funcPtr ptr_sigmoid = device_sigmoid;
+__device__ funcPtr ptr_relu = device_relu;
+__device__ funcPtr ptr_leaky_relu = device_leaky_relu;
+__device__ funcPtr ptr_tanh = device_tanh_;
+__device__ funcPtr ptr_identity = device_identity;
+
+__device__ funcPtr ptr_identity_derivative = device_identity_derivative;
+__device__ funcPtr ptr_sigmoid_derivative = device_sigmoid_derivative;
+__device__ funcPtr ptr_relu_derivative = device_relu_derivative;
+__device__ funcPtr ptr_leaky_relu_derivative = device_leaky_relu_derivative;
+__device__ funcPtr ptr_tanh_derivative = device_tanh_derivative;
+#endif
+
+
+
+void apply_softmax_input(float ***input, int depth, int rows, int columns) {
+    float m = -FLT_MAX;
+    float sum=0;
+    for (int i=0; i < depth; i++) {
+        for (int j=0; j < rows; j++) {
+            for (int k=0; k < columns; k++) {
+                m = fmaxf(m, input[i][j][k]);
+            }
+        }
+    }
+    for (int i=0; i < depth; i++) {
+        for (int j=0; j < rows; j++) {
+            for (int k=0; k < columns; k++) {
+                input[i][j][k] = exp(m-input[i][j][k]);
+                sum += input[i][j][k];
+            }
+        }
+    }
+    for (int i=0; i < depth; i++) {
+        for (int j=0; j < rows; j++) {
+            for (int k=0; k < columns; k++) {
+                input[i][j][k] = input[i][j][k]/sum;
+            }
+        }
+    }
+}
+
+
+/* 
+* Apply function on input
+*/
+#ifdef __CUDACC__
+__global__ void apply_function_input_kernel(funcPtr f, float*** input, int depth, int rows, int columns) {
+    // Équivalents respectifs de i, j et k dans la boucle effectuée par le cpu
+    int idx = threadIdx.x + blockDim.x*blockIdx.x; // < depth
+    int idy = threadIdx.y + blockDim.y*blockIdx.y; // < rows
+    int idz = threadIdx.z + blockDim.z*blockIdx.z; // < columns
+
+    if (idx >= depth || idy >= rows || idz >= columns) {
+        return;
+    }
+
+    input[idx][idy][idz] = (*f)(input[idx][idy][idz]);
+}
+
+
+void apply_function_input_device(int activation, float*** input, int depth, int rows, int columns) {
+    // Make computation
+    dim3 gridSize(i_div_up(depth, BLOCKSIZE_x), i_div_up(rows, BLOCKSIZE_y), i_div_up(columns, BLOCKSIZE_z));
+    dim3 blockSize(BLOCKSIZE_x, BLOCKSIZE_y, BLOCKSIZE_z);
+
+    funcPtr activation_function = get_activation_function_cuda(activation);
+
+    apply_function_input_kernel<<<gridSize, blockSize>>>(activation_function, input, depth, rows, columns);
+    
+    gpuErrchk( cudaPeekAtLastError() );
+    gpuErrchk( cudaDeviceSynchronize() );
+}
+#endif
+
+void apply_function_input_cpu(int activation, float*** input, int depth, int rows, int columns) {
+    funcPtr f = get_activation_function(activation);
+
+    for (int i=0; i < depth; i++) {
+        for (int j=0; j < rows; j++) {
+            for (int k=0; k < columns; k++) {
+                input[i][j][k] = (*f)(input[i][j][k]);
+            }
+        }
+    }
+}
+
+#ifdef __CUDACC__
+extern "C"
+#endif
+void apply_function_input(int activation, float*** input, int depth, int rows, int columns) {
+    #ifndef __CUDACC__
+    apply_function_input_cpu(activation, input, depth, rows, columns);
+    #else
+    apply_function_input_device(activation, input, depth, rows, columns);
+    #endif
+}
+
+void apply_function_to_matrix(int activation, float*** input, int depth, int dim) {
+    if (activation == SOFTMAX) {
+        return apply_softmax_input(input, depth, dim, dim);
+    }
+    if (activation >= 1) { // Exclude negative values (derivative)
+        return apply_function_input(activation, input, depth, dim, dim);
+    }
+    printf_error((char*)"fonction d'activation inconnue (apply_function_to_matrix): ");
+    printf("%d\n", activation);
+}
+
+
+void apply_function_to_vector(int activation, float*** input, int dim) {
+    if (activation == SOFTMAX) {
+        return apply_softmax_input(input, 1, 1, dim);
+    }
+    if (activation >= 1) { // Exclude negative values (derivative)
+        return apply_function_input(activation, input, 1, 1, dim);
+    }
+    printf_error((char*)"fonction d'activation inconnue (apply_function_to_vector): ");
+    printf("%d\n", activation);
+}
+
+
+funcPtr get_activation_function(int activation) {
+    switch (activation) {
+        case RELU:
+            return &relu;
+        case -RELU:
+            return &relu_derivative;
+
+        case IDENTITY:
+            return &identity;
+        case -IDENTITY:
+            return &identity_derivative;
+
+        case SIGMOID:
+            return &sigmoid;
+        case -SIGMOID:
+            return &sigmoid_derivative;
+        
+        case LEAKY_RELU:
+            return &leaky_relu;
+        case -LEAKY_RELU:
+            return &leaky_relu_derivative;
+
+        case TANH:
+            return &tanh_;
+        case -TANH:
+            return &tanh_derivative;
+
+        case SOFTMAX:
+            printf_error((char*)"impossible de renvoyer la fonction softmax\n");
+            return NULL;
+        case -SOFTMAX:
+            printf_error((char*)"impossible de renvoyer la dérivée de la fonction softmax\n");
+            return NULL;
+
+        default:
+            printf_error((char*)"fonction d'activation inconnue (get_activation_function_cuda): ");
+            printf("%d\n", activation);
+            return NULL;
+    }
+}
+
+
+#ifdef __CUDACC__
+funcPtr get_activation_function_cuda(int activation) {
+    funcPtr host_function;
+    
+    switch (activation) {
+        case RELU:
+            gpuErrchk( cudaMemcpyFromSymbol(&host_function, ptr_relu, sizeof(funcPtr)));
+            break;
+        case -RELU:
+            gpuErrchk( cudaMemcpyFromSymbol(&host_function, ptr_relu_derivative, sizeof(funcPtr)));
+            break;
+
+        case IDENTITY:
+            gpuErrchk( cudaMemcpyFromSymbol(&host_function, ptr_identity, sizeof(funcPtr)));
+            break;
+        case -IDENTITY:
+            gpuErrchk( cudaMemcpyFromSymbol(&host_function, ptr_identity_derivative, sizeof(funcPtr)));
+            break;
+
+        case SIGMOID:
+            gpuErrchk( cudaMemcpyFromSymbol(&host_function, ptr_sigmoid, sizeof(funcPtr)));
+            break;
+        case -SIGMOID:
+            gpuErrchk( cudaMemcpyFromSymbol(&host_function, ptr_sigmoid_derivative, sizeof(funcPtr)));
+            break;
+        
+        case LEAKY_RELU:
+            gpuErrchk( cudaMemcpyFromSymbol(&host_function, ptr_leaky_relu, sizeof(funcPtr)));
+            break;
+        case -LEAKY_RELU:
+            gpuErrchk( cudaMemcpyFromSymbol(&host_function, ptr_leaky_relu_derivative, sizeof(funcPtr)));
+            break;
+
+        case TANH:
+            gpuErrchk( cudaMemcpyFromSymbol(&host_function, ptr_tanh, sizeof(funcPtr)));
+            break;
+        case -TANH:
+            gpuErrchk( cudaMemcpyFromSymbol(&host_function, ptr_tanh_derivative, sizeof(funcPtr)));
+            break;
+
+        case SOFTMAX:
+            printf_error((char*)"impossible de renvoyer la fonction softmax\n");
+            return NULL;
+        case -SOFTMAX:
+            printf_error((char*)"impossible de renvoyer la dérivée de la fonction softmax\n");
+            return NULL;
+
+        default:
+            printf_error((char*)"fonction d'activation inconnue (get_activation_function_cuda): ");
+            printf("%d\n", activation);
+            return NULL;
+    }
+    return host_function;
+}
+#endif
\ No newline at end of file
diff --git a/src/cnn/include/backpropagation.h b/src/cnn/include/backpropagation.h
index dea99ff..d44af8e 100644
--- a/src/cnn/include/backpropagation.h
+++ b/src/cnn/include/backpropagation.h
@@ -40,16 +40,16 @@ void backward_max_pooling(float*** input, float*** output, int input_width, int
 /*
 * Transfert les informations d'erreur à travers une couche fully connected
 */
-void backward_dense(Kernel_nn* ker, float* input, float* input_z, float* output, int size_input, int size_output, ptr d_function, int is_first);
+void backward_dense(Kernel_nn* ker, float* input, float* input_z, float* output, int size_input, int size_output, funcPtr d_function, int is_first);
 
 /*
 * Transfert les informations d'erreur à travers une couche de linéarisation
 */
-void backward_linearisation(Kernel_nn* ker, float*** input, float*** input_z, float* output, int depth_input, int dim_input, int size_output, ptr d_function);
+void backward_linearisation(Kernel_nn* ker, float*** input, float*** input_z, float* output, int depth_input, int dim_input, int size_output, funcPtr d_function);
 
 /*
 * Transfert les informations d'erreur à travers un couche de convolution
 */
-void backward_convolution(Kernel_cnn* ker, float*** input, float*** input_z, float*** output, int depth_input, int dim_input, int depth_output, int dim_output, ptr d_function, int is_first);
+void backward_convolution(Kernel_cnn* ker, float*** input, float*** input_z, float*** output, int depth_input, int dim_input, int depth_output, int dim_output, funcPtr d_function, int is_first);
 
 #endif
diff --git a/src/cnn/include/function.h b/src/cnn/include/function.h
index c09ccab..4234009 100644
--- a/src/cnn/include/function.h
+++ b/src/cnn/include/function.h
@@ -16,52 +16,132 @@
 #define RELU_CLIP_VALUE 15
 
 
-typedef float (*ptr)(float);
-typedef ptr (*pm)();
+typedef float (*funcPtr)(float);
 
+//* Identité
+#ifdef __CUDACC__
+__device__ float device_identity(float x);
+__device__ float device_identity_derivative(float x);
+#endif
+
+#ifdef __CUDACC__
+extern "C"
+#endif
 float identity(float x);
 
+#ifdef __CUDACC__
+extern "C"
+#endif
 float identity_derivative(float x);
 
+//* Sigmoid
+#ifdef __CUDACC__
+__device__ float device_sigmoid(float x);
+__device__ float device_sigmoid_derivative(float x);
+#endif
+
+#ifdef __CUDACC__
+extern "C"
+#endif
 float sigmoid(float x);
 
+#ifdef __CUDACC__
+extern "C"
+#endif
 float sigmoid_derivative(float x);
 
+//* RELU
+#ifdef __CUDACC__
+__device__ float device_relu(float x);
+__device__ float device_relu_derivative(float x);
+#endif
+
+#ifdef __CUDACC__
+extern "C"
+#endif
 float relu(float x);
 
+#ifdef __CUDACC__
+extern "C"
+#endif
 float relu_derivative(float x);
 
+//* Leaky RELU
+#ifdef __CUDACC__
+__device__ float device_leaky_relu(float x);
+__device__ float device_leaky_relu_derivative(float x);
+#endif
+
+#ifdef __CUDACC__
+extern "C"
+#endif
 float leaky_relu(float x);
 
+#ifdef __CUDACC__
+extern "C"
+#endif
 float leaky_relu_derivative(float x);
 
+//* Tanh
+#ifdef __CUDACC__
+__device__ float device_tanh_(float x);
+__device__ float device_tanh_derivative(float x);
+#endif
+
+#ifdef __CUDACC__
+extern "C"
+#endif
 float tanh_(float x);
 
+#ifdef __CUDACC__
+extern "C"
+#endif
 float tanh_derivative(float x);
 
+
+#ifdef __CUDACC__
+extern "C"
+#endif
 /*
 * Applique softmax sur input[depth][rows][columns]
 */
 void apply_softmax_input(float ***input, int depth, int rows, int columns);
 
+#ifdef __CUDACC__
+extern "C"
+#endif
 /*
 * Applique la fonction f sur input[depth][rows][columns]
 */
-void apply_function_input(float (*f)(float), float*** input, int depth, int rows, int columns);
+void apply_function_input(int activation, float*** input, int depth, int rows, int columns);
 
+#ifdef __CUDACC__
+extern "C"
+#endif
 /*
 * Applique une fonction d'activation (repérée par son identifiant) à une matrice
 */
 void apply_function_to_matrix(int activation, float*** input, int depth, int dim);
 
+#ifdef __CUDACC__
+extern "C"
+#endif
 /*
 * Applique une fonction d'activation (repérée par son identifiant) à un vecteur
 */
 void apply_function_to_vector(int activation, float*** input, int dim);
 
+#ifdef __CUDACC__
+extern "C"
+#endif
 /*
 * Renvoie la fonction d'activation correspondant à son identifiant (activation)
 */
-ptr get_activation_function(int activation);
+funcPtr get_activation_function(int activation);
+
+/*
+* Récupère un pointeur sur le device vers la fonction d'activation demandée puis le transforme en pointeur sur l'host
+*/
+funcPtr get_activation_function_cuda(int activation);
 
 #endif
\ No newline at end of file
diff --git a/test/cnn_function.cu b/test/cnn_function.cu
new file mode 100644
index 0000000..931a50b
--- /dev/null
+++ b/test/cnn_function.cu
@@ -0,0 +1,60 @@
+#include <stdlib.h>
+#include <assert.h>
+#include <stdio.h>
+
+#include "../src/include/memory_management.h"
+#include "../src/cnn/include/function.h"
+#include "../src/include/colors.h"
+
+
+int main() {
+    printf("Initialisation\n");
+    // Initialise values
+    int depth = 10;
+    int rows = 10;
+    int columns = 10;
+
+    float*** input = (float***)nalloc(depth, sizeof(float**));
+    float*** input_initial = (float***)malloc(depth*sizeof(float**));
+    for (int i=0; i < depth; i++) {
+        input[i] = (float**)nalloc(rows, sizeof(float*));
+        input_initial[i] = (float**)malloc(rows*sizeof(float*));
+        for (int j=0; j < rows; j++) {
+            input[i][j] = (float*)nalloc(columns, sizeof(float));
+            input_initial[i][j] = (float*)malloc(columns*sizeof(float));
+            for (int k=0; k < columns; k++) {
+                input[i][j][k] = rand()/RAND_MAX;
+                input_initial[i][j][k] = input[i][j][k];
+            }
+        }
+    }
+    printf(GREEN "OK\n" RESET);
+
+    funcPtr func = get_activation_function(TANH);
+
+    printf("Calcul par CUDA\n");
+    apply_function_input(TANH, input, depth, rows, columns);
+    printf(GREEN "OK\n" RESET);
+
+    printf("Vérification des résultats\n");
+    for (int i=0; i < depth; i++) {
+        for (int j=0; j < rows; j++) {
+            for (int k=0; k < columns; k++) {
+                if (fabs((*func)(input_initial[i][j][k]) - input[i][j][k]) > 1e-6) {
+                    printf_error((char*)"Les résultats ne coincident pas\n");
+                    printf("Différence %e\n", fabs((*func)(input_initial[i][j][k]) - input[i][j][k]));
+                    //exit(1);
+                }
+            }
+            gree(input[i][j]);
+            free(input_initial[i][j]);
+        }
+        gree(input[i]);
+        free(input_initial[i]);
+    }
+    gree(input);
+    free(input_initial);
+
+    printf(GREEN "OK\n" RESET);
+    return 0;
+}
\ No newline at end of file