Add function.cu

Translate `apply_function_input` and `get_activation_function` to CUDA
2025-04-18 20:13:55 +02:00 · 2023-03-26 17:14:07 +02:00 · 2023-03-26 17:14:07 +02:00 · 677de892e7
commit 677de892e7
parent e4003aea28
8 changed files with 751 additions and 62 deletions
--- a/5
+++ b/5
@ -95,7 +95,7 @@ $(BUILDDIR)/cnn-main-cuda: $(BUILDDIR)/cnn_main.cuda.o \
 		$(BUILDDIR)/cnn_initialisation.cuda.o \
 		$(BUILDDIR)/cnn_cuda_make.o \
 		$(BUILDDIR)/cnn_neuron_io.cuda.o \
-		$(BUILDDIR)/cnn_function.cuda.o  \
+		$(BUILDDIR)/cnn_cuda_function.o  \
 		$(BUILDDIR)/cnn_utils.cuda.o \
 		$(BUILDDIR)/cnn_update.cuda.o \
 		$(BUILDDIR)/cnn_free.cuda.o \
@ -223,4 +223,5 @@ $(CACHE_DIR)/mnist-reseau-cnn.bin: $(BUILDDIR)/cnn-main
 #
 clean:
 	rm -rf $(BUILDDIR)/*
-	rm -f $(CACHE_DIR)/*
+
+#rm -f $(CACHE_DIR)/*
--- a/src/cnn/backpropagation.c
+++ b/src/cnn/backpropagation.c
@ -82,7 +82,7 @@ void backward_max_pooling(float*** input, float*** output, int input_width, int
    }
 }

-void backward_dense(Kernel_nn* ker, float* input, float* input_z, float* output, int size_input, int size_output, ptr d_function, int is_first) {
+void backward_dense(Kernel_nn* ker, float* input, float* input_z, float* output, int size_input, int size_output, funcPtr d_function, int is_first) {
    // Bias
    for (int j=0; j < size_output; j++) {
        ker->d_bias[j] += output[j];
@ -109,7 +109,7 @@ void backward_dense(Kernel_nn* ker, float* input, float* input_z, float* output,
    }
 }

-void backward_linearisation(Kernel_nn* ker, float*** input, float*** input_z, float* output, int depth_input, int dim_input, int size_output, ptr d_function) {
+void backward_linearisation(Kernel_nn* ker, float*** input, float*** input_z, float* output, int depth_input, int dim_input, int size_output,funcPtr d_function) {
    // Bias
    for (int j=0; j < size_output; j++) {
        ker->d_bias[j] += output[j];
@ -144,7 +144,7 @@ void backward_linearisation(Kernel_nn* ker, float*** input, float*** input_z, fl
    }
 }

-void backward_convolution(Kernel_cnn* ker, float*** input, float*** input_z, float*** output, int depth_input, int dim_input, int depth_output, int dim_output, ptr d_function, int is_first) {
+void backward_convolution(Kernel_cnn* ker, float*** input, float*** input_z, float*** output, int depth_input, int dim_input, int depth_output, int dim_output, funcPtr d_function, int is_first) {
    // Bias
    for (int i=0; i < depth_output; i++) {
        for (int j=0; j < dim_output; j++) {
--- a/src/cnn/cnn.c
+++ b/src/cnn/cnn.c
@ -251,10 +251,10 @@ void backward_propagation(Network* network, int wanted_number) {


        if (k_i->cnn) { // Convolution
-            ptr d_f = get_activation_function(-activation);
+            funcPtr d_f = get_activation_function(-activation);
            backward_convolution(k_i->cnn, input, input_z, output, input_depth, input_width, output_depth, output_width, d_f, i==0);
        } else if (k_i->nn) { // Full connection
-            ptr d_f = get_activation_function(-activation);
+            funcPtr d_f = get_activation_function(-activation);
            if (k_i->linearisation == DOESNT_LINEARISE) { // Vecteur -> Vecteur
                backward_dense(k_i->nn, input[0][0], input_z[0][0], output[0][0], input_width, output_width, d_f, i==0);
            } else { // Matrice -> vecteur
--- a/src/cnn/function.c
+++ b/src/cnn/function.c
@ -3,9 +3,26 @@
 #include <float.h>

 #include "../include/colors.h"
+#include "../include/utils.h"

 #include "include/function.h"

+#define BLOCKSIZE_x 16
+#define BLOCKSIZE_y 8
+#define BLOCKSIZE_z 8
+
+//* Identity
+#ifdef __CUDACC__
+__device__ float device_identity(float x) {
+    return x;
+}
+
+__device__ float device_identity_derivative(float x) {
+    (void)x;
+    return 1;
+}
+#endif
+
 float identity(float x) {
    return x;
 }
@ -16,6 +33,18 @@ float identity_derivative(float x) {
 }


+//* Sigmoid
+#ifdef __CUDACC__
+__device__ float device_sigmoid(float x) {
+    return 1/(1 + exp(-x));
+}
+
+__device__ float device_sigmoid_derivative(float x) {
+    float tmp = exp(-x);
+    return tmp/((1+tmp)*(1+tmp));
+}
+#endif
+
 float sigmoid(float x) {
    return 1/(1 + exp(-x));
 }
@ -26,6 +55,19 @@ float sigmoid_derivative(float x) {
 }


+//* RELU
+#ifdef __CUDACC__
+__device__ float device_relu(float x) {
+    return fmaxf(0, fminf(x, RELU_CLIP_VALUE));
+}
+
+__device__ float device_relu_derivative(float x) {
+    if (x > 0)
+        return 1;
+    return 0;
+}
+#endif
+
 float relu(float x) {
    return fmaxf(0, fminf(x, RELU_CLIP_VALUE));
 }
@ -37,6 +79,21 @@ float relu_derivative(float x) {
 }


+//* Leaky RELU
+#ifdef __CUDACC__
+__device__ float device_leaky_relu(float x) {
+    if (x>0)
+        return fminf(x, RELU_CLIP_VALUE);
+    return x*LEAKER;
+}
+
+__device__ float device_leaky_relu_derivative(float x) {
+    if (x > 0)
+        return 1;
+    return LEAKER;
+}
+#endif
+
 float leaky_relu(float x) {
    if (x>0)
        return fminf(x, RELU_CLIP_VALUE);
@ -50,6 +107,22 @@ float leaky_relu_derivative(float x) {
 }


+//* Tanh
+#ifdef __CUDACC__
+__device__
+#endif
+float device_tanh_(float x) {
+    return tanh(x);
+}
+
+#ifdef __CUDACC__
+__device__
+#endif
+float device_tanh_derivative(float x) {
+    float a = tanh(x);
+    return 1 - a*a;
+}
+
 float tanh_(float x) {
    return tanh(x);
 }
@ -60,6 +133,28 @@ float tanh_derivative(float x) {
 }


+
+
+#ifdef __CUDACC__
+/*
+ * Définition des pointeurs de fonctions pour CUDA
+ * voir https://stackoverflow.com/a/15646771
+*/
+__device__ funcPtr ptr_sigmoid = device_sigmoid;
+__device__ funcPtr ptr_relu = device_relu;
+__device__ funcPtr ptr_leaky_relu = device_leaky_relu;
+__device__ funcPtr ptr_tanh = device_tanh_;
+__device__ funcPtr ptr_identity = device_identity;
+
+__device__ funcPtr ptr_identity_derivative = device_identity_derivative;
+__device__ funcPtr ptr_sigmoid_derivative = device_sigmoid_derivative;
+__device__ funcPtr ptr_relu_derivative = device_relu_derivative;
+__device__ funcPtr ptr_leaky_relu_derivative = device_leaky_relu_derivative;
+__device__ funcPtr ptr_tanh_derivative = device_tanh_derivative;
+#endif
+
+
+
 void apply_softmax_input(float ***input, int depth, int rows, int columns) {
    float m = -FLT_MAX;
    float sum=0;
@ -88,7 +183,41 @@ void apply_softmax_input(float ***input, int depth, int rows, int columns) {
 }


-void apply_function_input(float (*f)(float), float*** input, int depth, int rows, int columns) {
+/* 
+* Apply function on input
+*/
+#ifdef __CUDACC__
+__global__ void apply_function_input_kernel(funcPtr f, float*** input, int depth, int rows, int columns) {
+    // Équivalents respectifs de i, j et k dans la boucle effectuée par le cpu
+    int idx = threadIdx.x + blockDim.x*blockIdx.x; // < depth
+    int idy = threadIdx.y + blockDim.y*blockIdx.y; // < rows
+    int idz = threadIdx.z + blockDim.z*blockIdx.z; // < columns
+
+    if (idx >= depth || idy >= rows || idz >= columns) {
+        return;
+    }
+
+    input[idx][idy][idz] = (*f)(input[idx][idy][idz]);
+}
+
+
+void apply_function_input_device(int activation, float*** input, int depth, int rows, int columns) {
+    // Make computation
+    dim3 gridSize(i_div_up(depth, BLOCKSIZE_x), i_div_up(rows, BLOCKSIZE_y), i_div_up(columns, BLOCKSIZE_z));
+    dim3 blockSize(BLOCKSIZE_x, BLOCKSIZE_y, BLOCKSIZE_z);
+
+    funcPtr activation_function = get_activation_function_cuda(activation);
+
+    apply_function_input_kernel<<<gridSize, blockSize>>>(activation_function, input, depth, rows, columns);
+    
+    gpuErrchk( cudaPeekAtLastError() );
+    gpuErrchk( cudaDeviceSynchronize() );
+}
+#endif
+
+void apply_function_input_cpu(int activation, float*** input, int depth, int rows, int columns) {
+    funcPtr f = get_activation_function(activation);
+
    for (int i=0; i < depth; i++) {
        for (int j=0; j < rows; j++) {
            for (int k=0; k < columns; k++) {
@ -98,15 +227,25 @@ void apply_function_input(float (*f)(float), float*** input, int depth, int rows
    }
 }

+#ifdef __CUDACC__
+extern "C"
+#endif
+void apply_function_input(int activation, float*** input, int depth, int rows, int columns) {
+    #ifndef __CUDACC__
+    apply_function_input_cpu(activation, input, depth, rows, columns);
+    #else
+    apply_function_input_device(activation, input, depth, rows, columns);
+    #endif
+}
+
 void apply_function_to_matrix(int activation, float*** input, int depth, int dim) {
    if (activation == SOFTMAX) {
        return apply_softmax_input(input, depth, dim, dim);
    }
    if (activation >= 1) { // Exclude negative values (derivative)
-        ptr f = get_activation_function(activation);
-        return apply_function_input(f, input, depth, dim, dim);
+        return apply_function_input(activation, input, depth, dim, dim);
    }
-    printf_error("fonction d'activation inconnue (apply_function_to_matrix): ");
+    printf_error((char*)"fonction d'activation inconnue (apply_function_to_matrix): ");
    printf("%d\n", activation);
 }

@ -116,59 +255,107 @@ void apply_function_to_vector(int activation, float*** input, int dim) {
        return apply_softmax_input(input, 1, 1, dim);
    }
    if (activation >= 1) { // Exclude negative values (derivative)
-        ptr f = get_activation_function(activation);
-        return apply_function_input(f, input, 1, 1, dim);
+        return apply_function_input(activation, input, 1, 1, dim);
    }
-    printf_error("fonction d'activation inconnue (apply_function_to_vector): ");
+    printf_error((char*)"fonction d'activation inconnue (apply_function_to_vector): ");
    printf("%d\n", activation);
 }


-ptr get_activation_function(int activation) {
-    if (activation == RELU) {
+funcPtr get_activation_function(int activation) {
+    switch (activation) {
+        case RELU:
            return &relu;
-    }
-    if (activation == -RELU) {
+        case -RELU:
            return &relu_derivative;
-    }

-    if (activation == IDENTITY) {
+        case IDENTITY:
            return &identity;
-    }
-    if (activation == -IDENTITY) {
+        case -IDENTITY:
            return &identity_derivative;
-    }

-    if (activation == SIGMOID) {
+        case SIGMOID:
            return &sigmoid;
-    }
-    if (activation == -SIGMOID) {
+        case -SIGMOID:
            return &sigmoid_derivative;
-    }
        
-    if (activation == SOFTMAX) {
-        printf_error("impossible de renvoyer la fonction softmax\n");
-        return NULL;
-    }
-    if (activation == -SOFTMAX) {
-        printf_error("impossible de renvoyer la dérivée de la fonction softmax\n");
-        return NULL;
-    }
-
-    if (activation == TANH) {
-        return &tanh_;
-    }
-    if (activation == -TANH) {
-        return &tanh_derivative;
-    }
-
-    if (activation == LEAKY_RELU) {
+        case LEAKY_RELU:
            return &leaky_relu;
-    }
-    if (activation == -LEAKY_RELU) {
+        case -LEAKY_RELU:
            return &leaky_relu_derivative;
-    }
-    printf_error("fonction d'activation inconnue (get_activation_function): ");
+
+        case TANH:
+            return &tanh_;
+        case -TANH:
+            return &tanh_derivative;
+
+        case SOFTMAX:
+            printf_error((char*)"impossible de renvoyer la fonction softmax\n");
+            return NULL;
+        case -SOFTMAX:
+            printf_error((char*)"impossible de renvoyer la dérivée de la fonction softmax\n");
+            return NULL;
+
+        default:
+            printf_error((char*)"fonction d'activation inconnue (get_activation_function_cuda): ");
            printf("%d\n", activation);
            return NULL;
    }
+}
+
+
+#ifdef __CUDACC__
+funcPtr get_activation_function_cuda(int activation) {
+    funcPtr host_function;
+    
+    switch (activation) {
+        case RELU:
+            gpuErrchk( cudaMemcpyFromSymbol(&host_function, ptr_relu, sizeof(funcPtr)));
+            break;
+        case -RELU:
+            gpuErrchk( cudaMemcpyFromSymbol(&host_function, ptr_relu_derivative, sizeof(funcPtr)));
+            break;
+
+        case IDENTITY:
+            gpuErrchk( cudaMemcpyFromSymbol(&host_function, ptr_identity, sizeof(funcPtr)));
+            break;
+        case -IDENTITY:
+            gpuErrchk( cudaMemcpyFromSymbol(&host_function, ptr_identity_derivative, sizeof(funcPtr)));
+            break;
+
+        case SIGMOID:
+            gpuErrchk( cudaMemcpyFromSymbol(&host_function, ptr_sigmoid, sizeof(funcPtr)));
+            break;
+        case -SIGMOID:
+            gpuErrchk( cudaMemcpyFromSymbol(&host_function, ptr_sigmoid_derivative, sizeof(funcPtr)));
+            break;
+        
+        case LEAKY_RELU:
+            gpuErrchk( cudaMemcpyFromSymbol(&host_function, ptr_leaky_relu, sizeof(funcPtr)));
+            break;
+        case -LEAKY_RELU:
+            gpuErrchk( cudaMemcpyFromSymbol(&host_function, ptr_leaky_relu_derivative, sizeof(funcPtr)));
+            break;
+
+        case TANH:
+            gpuErrchk( cudaMemcpyFromSymbol(&host_function, ptr_tanh, sizeof(funcPtr)));
+            break;
+        case -TANH:
+            gpuErrchk( cudaMemcpyFromSymbol(&host_function, ptr_tanh_derivative, sizeof(funcPtr)));
+            break;
+
+        case SOFTMAX:
+            printf_error((char*)"impossible de renvoyer la fonction softmax\n");
+            return NULL;
+        case -SOFTMAX:
+            printf_error((char*)"impossible de renvoyer la dérivée de la fonction softmax\n");
+            return NULL;
+
+        default:
+            printf_error((char*)"fonction d'activation inconnue (get_activation_function_cuda): ");
+            printf("%d\n", activation);
+            return NULL;
+    }
+    return host_function;
+}
+#endif
--- a/src/cnn/function.cu
+++ b/src/cnn/function.cu
@ -0,0 +1,361 @@
+#include <stdio.h>
+#include <math.h>
+#include <float.h>
+
+#include "../include/colors.h"
+#include "../include/utils.h"
+
+#include "include/function.h"
+
+#define BLOCKSIZE_x 16
+#define BLOCKSIZE_y 8
+#define BLOCKSIZE_z 8
+
+//* Identity
+#ifdef __CUDACC__
+__device__ float device_identity(float x) {
+    return x;
+}
+
+__device__ float device_identity_derivative(float x) {
+    (void)x;
+    return 1;
+}
+#endif
+
+float identity(float x) {
+    return x;
+}
+
+float identity_derivative(float x) {
+    (void)x;
+    return 1;
+}
+
+
+//* Sigmoid
+#ifdef __CUDACC__
+__device__ float device_sigmoid(float x) {
+    return 1/(1 + exp(-x));
+}
+
+__device__ float device_sigmoid_derivative(float x) {
+    float tmp = exp(-x);
+    return tmp/((1+tmp)*(1+tmp));
+}
+#endif
+
+float sigmoid(float x) {
+    return 1/(1 + exp(-x));
+}
+
+float sigmoid_derivative(float x) {
+    float tmp = exp(-x);
+    return tmp/((1+tmp)*(1+tmp));
+}
+
+
+//* RELU
+#ifdef __CUDACC__
+__device__ float device_relu(float x) {
+    return fmaxf(0, fminf(x, RELU_CLIP_VALUE));
+}
+
+__device__ float device_relu_derivative(float x) {
+    if (x > 0)
+        return 1;
+    return 0;
+}
+#endif
+
+float relu(float x) {
+    return fmaxf(0, fminf(x, RELU_CLIP_VALUE));
+}
+
+float relu_derivative(float x) {
+    if (x > 0)
+        return 1;
+    return 0;
+}
+
+
+//* Leaky RELU
+#ifdef __CUDACC__
+__device__ float device_leaky_relu(float x) {
+    if (x>0)
+        return fminf(x, RELU_CLIP_VALUE);
+    return x*LEAKER;
+}
+
+__device__ float device_leaky_relu_derivative(float x) {
+    if (x > 0)
+        return 1;
+    return LEAKER;
+}
+#endif
+
+float leaky_relu(float x) {
+    if (x>0)
+        return fminf(x, RELU_CLIP_VALUE);
+    return x*LEAKER;
+}
+
+float leaky_relu_derivative(float x) {
+    if (x > 0)
+        return 1;
+    return LEAKER;
+}
+
+
+//* Tanh
+#ifdef __CUDACC__
+__device__
+#endif
+float device_tanh_(float x) {
+    return tanh(x);
+}
+
+#ifdef __CUDACC__
+__device__
+#endif
+float device_tanh_derivative(float x) {
+    float a = tanh(x);
+    return 1 - a*a;
+}
+
+float tanh_(float x) {
+    return tanh(x);
+}
+
+float tanh_derivative(float x) {
+    float a = tanh(x);
+    return 1 - a*a;
+}
+
+
+
+
+#ifdef __CUDACC__
+/*
+ * Définition des pointeurs de fonctions pour CUDA
+ * voir https://stackoverflow.com/a/15646771
+*/
+__device__ funcPtr ptr_sigmoid = device_sigmoid;
+__device__ funcPtr ptr_relu = device_relu;
+__device__ funcPtr ptr_leaky_relu = device_leaky_relu;
+__device__ funcPtr ptr_tanh = device_tanh_;
+__device__ funcPtr ptr_identity = device_identity;
+
+__device__ funcPtr ptr_identity_derivative = device_identity_derivative;
+__device__ funcPtr ptr_sigmoid_derivative = device_sigmoid_derivative;
+__device__ funcPtr ptr_relu_derivative = device_relu_derivative;
+__device__ funcPtr ptr_leaky_relu_derivative = device_leaky_relu_derivative;
+__device__ funcPtr ptr_tanh_derivative = device_tanh_derivative;
+#endif
+
+
+
+void apply_softmax_input(float ***input, int depth, int rows, int columns) {
+    float m = -FLT_MAX;
+    float sum=0;
+    for (int i=0; i < depth; i++) {
+        for (int j=0; j < rows; j++) {
+            for (int k=0; k < columns; k++) {
+                m = fmaxf(m, input[i][j][k]);
+            }
+        }
+    }
+    for (int i=0; i < depth; i++) {
+        for (int j=0; j < rows; j++) {
+            for (int k=0; k < columns; k++) {
+                input[i][j][k] = exp(m-input[i][j][k]);
+                sum += input[i][j][k];
+            }
+        }
+    }
+    for (int i=0; i < depth; i++) {
+        for (int j=0; j < rows; j++) {
+            for (int k=0; k < columns; k++) {
+                input[i][j][k] = input[i][j][k]/sum;
+            }
+        }
+    }
+}
+
+
+/* 
+* Apply function on input
+*/
+#ifdef __CUDACC__
+__global__ void apply_function_input_kernel(funcPtr f, float*** input, int depth, int rows, int columns) {
+    // Équivalents respectifs de i, j et k dans la boucle effectuée par le cpu
+    int idx = threadIdx.x + blockDim.x*blockIdx.x; // < depth
+    int idy = threadIdx.y + blockDim.y*blockIdx.y; // < rows
+    int idz = threadIdx.z + blockDim.z*blockIdx.z; // < columns
+
+    if (idx >= depth || idy >= rows || idz >= columns) {
+        return;
+    }
+
+    input[idx][idy][idz] = (*f)(input[idx][idy][idz]);
+}
+
+
+void apply_function_input_device(int activation, float*** input, int depth, int rows, int columns) {
+    // Make computation
+    dim3 gridSize(i_div_up(depth, BLOCKSIZE_x), i_div_up(rows, BLOCKSIZE_y), i_div_up(columns, BLOCKSIZE_z));
+    dim3 blockSize(BLOCKSIZE_x, BLOCKSIZE_y, BLOCKSIZE_z);
+
+    funcPtr activation_function = get_activation_function_cuda(activation);
+
+    apply_function_input_kernel<<<gridSize, blockSize>>>(activation_function, input, depth, rows, columns);
+    
+    gpuErrchk( cudaPeekAtLastError() );
+    gpuErrchk( cudaDeviceSynchronize() );
+}
+#endif
+
+void apply_function_input_cpu(int activation, float*** input, int depth, int rows, int columns) {
+    funcPtr f = get_activation_function(activation);
+
+    for (int i=0; i < depth; i++) {
+        for (int j=0; j < rows; j++) {
+            for (int k=0; k < columns; k++) {
+                input[i][j][k] = (*f)(input[i][j][k]);
+            }
+        }
+    }
+}
+
+#ifdef __CUDACC__
+extern "C"
+#endif
+void apply_function_input(int activation, float*** input, int depth, int rows, int columns) {
+    #ifndef __CUDACC__
+    apply_function_input_cpu(activation, input, depth, rows, columns);
+    #else
+    apply_function_input_device(activation, input, depth, rows, columns);
+    #endif
+}
+
+void apply_function_to_matrix(int activation, float*** input, int depth, int dim) {
+    if (activation == SOFTMAX) {
+        return apply_softmax_input(input, depth, dim, dim);
+    }
+    if (activation >= 1) { // Exclude negative values (derivative)
+        return apply_function_input(activation, input, depth, dim, dim);
+    }
+    printf_error((char*)"fonction d'activation inconnue (apply_function_to_matrix): ");
+    printf("%d\n", activation);
+}
+
+
+void apply_function_to_vector(int activation, float*** input, int dim) {
+    if (activation == SOFTMAX) {
+        return apply_softmax_input(input, 1, 1, dim);
+    }
+    if (activation >= 1) { // Exclude negative values (derivative)
+        return apply_function_input(activation, input, 1, 1, dim);
+    }
+    printf_error((char*)"fonction d'activation inconnue (apply_function_to_vector): ");
+    printf("%d\n", activation);
+}
+
+
+funcPtr get_activation_function(int activation) {
+    switch (activation) {
+        case RELU:
+            return &relu;
+        case -RELU:
+            return &relu_derivative;
+
+        case IDENTITY:
+            return &identity;
+        case -IDENTITY:
+            return &identity_derivative;
+
+        case SIGMOID:
+            return &sigmoid;
+        case -SIGMOID:
+            return &sigmoid_derivative;
+        
+        case LEAKY_RELU:
+            return &leaky_relu;
+        case -LEAKY_RELU:
+            return &leaky_relu_derivative;
+
+        case TANH:
+            return &tanh_;
+        case -TANH:
+            return &tanh_derivative;
+
+        case SOFTMAX:
+            printf_error((char*)"impossible de renvoyer la fonction softmax\n");
+            return NULL;
+        case -SOFTMAX:
+            printf_error((char*)"impossible de renvoyer la dérivée de la fonction softmax\n");
+            return NULL;
+
+        default:
+            printf_error((char*)"fonction d'activation inconnue (get_activation_function_cuda): ");
+            printf("%d\n", activation);
+            return NULL;
+    }
+}
+
+
+#ifdef __CUDACC__
+funcPtr get_activation_function_cuda(int activation) {
+    funcPtr host_function;
+    
+    switch (activation) {
+        case RELU:
+            gpuErrchk( cudaMemcpyFromSymbol(&host_function, ptr_relu, sizeof(funcPtr)));
+            break;
+        case -RELU:
+            gpuErrchk( cudaMemcpyFromSymbol(&host_function, ptr_relu_derivative, sizeof(funcPtr)));
+            break;
+
+        case IDENTITY:
+            gpuErrchk( cudaMemcpyFromSymbol(&host_function, ptr_identity, sizeof(funcPtr)));
+            break;
+        case -IDENTITY:
+            gpuErrchk( cudaMemcpyFromSymbol(&host_function, ptr_identity_derivative, sizeof(funcPtr)));
+            break;
+
+        case SIGMOID:
+            gpuErrchk( cudaMemcpyFromSymbol(&host_function, ptr_sigmoid, sizeof(funcPtr)));
+            break;
+        case -SIGMOID:
+            gpuErrchk( cudaMemcpyFromSymbol(&host_function, ptr_sigmoid_derivative, sizeof(funcPtr)));
+            break;
+        
+        case LEAKY_RELU:
+            gpuErrchk( cudaMemcpyFromSymbol(&host_function, ptr_leaky_relu, sizeof(funcPtr)));
+            break;
+        case -LEAKY_RELU:
+            gpuErrchk( cudaMemcpyFromSymbol(&host_function, ptr_leaky_relu_derivative, sizeof(funcPtr)));
+            break;
+
+        case TANH:
+            gpuErrchk( cudaMemcpyFromSymbol(&host_function, ptr_tanh, sizeof(funcPtr)));
+            break;
+        case -TANH:
+            gpuErrchk( cudaMemcpyFromSymbol(&host_function, ptr_tanh_derivative, sizeof(funcPtr)));
+            break;
+
+        case SOFTMAX:
+            printf_error((char*)"impossible de renvoyer la fonction softmax\n");
+            return NULL;
+        case -SOFTMAX:
+            printf_error((char*)"impossible de renvoyer la dérivée de la fonction softmax\n");
+            return NULL;
+
+        default:
+            printf_error((char*)"fonction d'activation inconnue (get_activation_function_cuda): ");
+            printf("%d\n", activation);
+            return NULL;
+    }
+    return host_function;
+}
+#endif
--- a/src/cnn/include/backpropagation.h
+++ b/src/cnn/include/backpropagation.h
@ -40,16 +40,16 @@ void backward_max_pooling(float*** input, float*** output, int input_width, int
 /*
 * Transfert les informations d'erreur à travers une couche fully connected
 */
-void backward_dense(Kernel_nn* ker, float* input, float* input_z, float* output, int size_input, int size_output, ptr d_function, int is_first);
+void backward_dense(Kernel_nn* ker, float* input, float* input_z, float* output, int size_input, int size_output, funcPtr d_function, int is_first);

 /*
 * Transfert les informations d'erreur à travers une couche de linéarisation
 */
-void backward_linearisation(Kernel_nn* ker, float*** input, float*** input_z, float* output, int depth_input, int dim_input, int size_output, ptr d_function);
+void backward_linearisation(Kernel_nn* ker, float*** input, float*** input_z, float* output, int depth_input, int dim_input, int size_output, funcPtr d_function);

 /*
 * Transfert les informations d'erreur à travers un couche de convolution
 */
-void backward_convolution(Kernel_cnn* ker, float*** input, float*** input_z, float*** output, int depth_input, int dim_input, int depth_output, int dim_output, ptr d_function, int is_first);
+void backward_convolution(Kernel_cnn* ker, float*** input, float*** input_z, float*** output, int depth_input, int dim_input, int depth_output, int dim_output, funcPtr d_function, int is_first);

 #endif
--- a/src/cnn/include/function.h
+++ b/src/cnn/include/function.h
@ -16,52 +16,132 @@
 #define RELU_CLIP_VALUE 15


-typedef float (*ptr)(float);
-typedef ptr (*pm)();
+typedef float (*funcPtr)(float);

+//* Identité
+#ifdef __CUDACC__
+__device__ float device_identity(float x);
+__device__ float device_identity_derivative(float x);
+#endif
+
+#ifdef __CUDACC__
+extern "C"
+#endif
 float identity(float x);

+#ifdef __CUDACC__
+extern "C"
+#endif
 float identity_derivative(float x);

+//* Sigmoid
+#ifdef __CUDACC__
+__device__ float device_sigmoid(float x);
+__device__ float device_sigmoid_derivative(float x);
+#endif
+
+#ifdef __CUDACC__
+extern "C"
+#endif
 float sigmoid(float x);

+#ifdef __CUDACC__
+extern "C"
+#endif
 float sigmoid_derivative(float x);

+//* RELU
+#ifdef __CUDACC__
+__device__ float device_relu(float x);
+__device__ float device_relu_derivative(float x);
+#endif
+
+#ifdef __CUDACC__
+extern "C"
+#endif
 float relu(float x);

+#ifdef __CUDACC__
+extern "C"
+#endif
 float relu_derivative(float x);

+//* Leaky RELU
+#ifdef __CUDACC__
+__device__ float device_leaky_relu(float x);
+__device__ float device_leaky_relu_derivative(float x);
+#endif
+
+#ifdef __CUDACC__
+extern "C"
+#endif
 float leaky_relu(float x);

+#ifdef __CUDACC__
+extern "C"
+#endif
 float leaky_relu_derivative(float x);

+//* Tanh
+#ifdef __CUDACC__
+__device__ float device_tanh_(float x);
+__device__ float device_tanh_derivative(float x);
+#endif
+
+#ifdef __CUDACC__
+extern "C"
+#endif
 float tanh_(float x);

+#ifdef __CUDACC__
+extern "C"
+#endif
 float tanh_derivative(float x);

+
+#ifdef __CUDACC__
+extern "C"
+#endif
 /*
 * Applique softmax sur input[depth][rows][columns]
 */
 void apply_softmax_input(float ***input, int depth, int rows, int columns);

+#ifdef __CUDACC__
+extern "C"
+#endif
 /*
 * Applique la fonction f sur input[depth][rows][columns]
 */
-void apply_function_input(float (*f)(float), float*** input, int depth, int rows, int columns);
+void apply_function_input(int activation, float*** input, int depth, int rows, int columns);

+#ifdef __CUDACC__
+extern "C"
+#endif
 /*
 * Applique une fonction d'activation (repérée par son identifiant) à une matrice
 */
 void apply_function_to_matrix(int activation, float*** input, int depth, int dim);

+#ifdef __CUDACC__
+extern "C"
+#endif
 /*
 * Applique une fonction d'activation (repérée par son identifiant) à un vecteur
 */
 void apply_function_to_vector(int activation, float*** input, int dim);

+#ifdef __CUDACC__
+extern "C"
+#endif
 /*
 * Renvoie la fonction d'activation correspondant à son identifiant (activation)
 */
-ptr get_activation_function(int activation);
+funcPtr get_activation_function(int activation);
+
+/*
+* Récupère un pointeur sur le device vers la fonction d'activation demandée puis le transforme en pointeur sur l'host
+*/
+funcPtr get_activation_function_cuda(int activation);

 #endif
--- a/test/cnn_function.cu
+++ b/test/cnn_function.cu
@ -0,0 +1,60 @@
+#include <stdlib.h>
+#include <assert.h>
+#include <stdio.h>
+
+#include "../src/include/memory_management.h"
+#include "../src/cnn/include/function.h"
+#include "../src/include/colors.h"
+
+
+int main() {
+    printf("Initialisation\n");
+    // Initialise values
+    int depth = 10;
+    int rows = 10;
+    int columns = 10;
+
+    float*** input = (float***)nalloc(depth, sizeof(float**));
+    float*** input_initial = (float***)malloc(depth*sizeof(float**));
+    for (int i=0; i < depth; i++) {
+        input[i] = (float**)nalloc(rows, sizeof(float*));
+        input_initial[i] = (float**)malloc(rows*sizeof(float*));
+        for (int j=0; j < rows; j++) {
+            input[i][j] = (float*)nalloc(columns, sizeof(float));
+            input_initial[i][j] = (float*)malloc(columns*sizeof(float));
+            for (int k=0; k < columns; k++) {
+                input[i][j][k] = rand()/RAND_MAX;
+                input_initial[i][j][k] = input[i][j][k];
+            }
+        }
+    }
+    printf(GREEN "OK\n" RESET);
+
+    funcPtr func = get_activation_function(TANH);
+
+    printf("Calcul par CUDA\n");
+    apply_function_input(TANH, input, depth, rows, columns);
+    printf(GREEN "OK\n" RESET);
+
+    printf("Vérification des résultats\n");
+    for (int i=0; i < depth; i++) {
+        for (int j=0; j < rows; j++) {
+            for (int k=0; k < columns; k++) {
+                if (fabs((*func)(input_initial[i][j][k]) - input[i][j][k]) > 1e-6) {
+                    printf_error((char*)"Les résultats ne coincident pas\n");
+                    printf("Différence %e\n", fabs((*func)(input_initial[i][j][k]) - input[i][j][k]));
+                    //exit(1);
+                }
+            }
+            gree(input[i][j]);
+            free(input_initial[i][j]);
+        }
+        gree(input[i]);
+        free(input_initial[i]);
+    }
+    gree(input);
+    free(input_initial);
+
+    printf(GREEN "OK\n" RESET);
+    return 0;
+}