From 677de892e7403571385ebf737727e6c6641051f3 Mon Sep 17 00:00:00 2001 From: augustin64 Date: Sun, 26 Mar 2023 17:14:07 +0200 Subject: [PATCH] Add function.cu Translate `apply_function_input` and `get_activation_function` to CUDA --- Makefile | 5 +- src/cnn/backpropagation.c | 6 +- src/cnn/cnn.c | 4 +- src/cnn/function.c | 283 +++++++++++++++++++---- src/cnn/function.cu | 361 ++++++++++++++++++++++++++++++ src/cnn/include/backpropagation.h | 6 +- src/cnn/include/function.h | 88 +++++++- test/cnn_function.cu | 60 +++++ 8 files changed, 751 insertions(+), 62 deletions(-) create mode 100644 src/cnn/function.cu create mode 100644 test/cnn_function.cu diff --git a/Makefile b/Makefile index 0c40477..5b4a3b2 100644 --- a/Makefile +++ b/Makefile @@ -95,7 +95,7 @@ $(BUILDDIR)/cnn-main-cuda: $(BUILDDIR)/cnn_main.cuda.o \ $(BUILDDIR)/cnn_initialisation.cuda.o \ $(BUILDDIR)/cnn_cuda_make.o \ $(BUILDDIR)/cnn_neuron_io.cuda.o \ - $(BUILDDIR)/cnn_function.cuda.o \ + $(BUILDDIR)/cnn_cuda_function.o \ $(BUILDDIR)/cnn_utils.cuda.o \ $(BUILDDIR)/cnn_update.cuda.o \ $(BUILDDIR)/cnn_free.cuda.o \ @@ -223,4 +223,5 @@ $(CACHE_DIR)/mnist-reseau-cnn.bin: $(BUILDDIR)/cnn-main # clean: rm -rf $(BUILDDIR)/* - rm -f $(CACHE_DIR)/* \ No newline at end of file + +#rm -f $(CACHE_DIR)/* \ No newline at end of file diff --git a/src/cnn/backpropagation.c b/src/cnn/backpropagation.c index 3b48556..758842c 100644 --- a/src/cnn/backpropagation.c +++ b/src/cnn/backpropagation.c @@ -82,7 +82,7 @@ void backward_max_pooling(float*** input, float*** output, int input_width, int } } -void backward_dense(Kernel_nn* ker, float* input, float* input_z, float* output, int size_input, int size_output, ptr d_function, int is_first) { +void backward_dense(Kernel_nn* ker, float* input, float* input_z, float* output, int size_input, int size_output, funcPtr d_function, int is_first) { // Bias for (int j=0; j < size_output; j++) { ker->d_bias[j] += output[j]; @@ -109,7 +109,7 @@ void backward_dense(Kernel_nn* ker, float* input, float* input_z, float* output, } } -void backward_linearisation(Kernel_nn* ker, float*** input, float*** input_z, float* output, int depth_input, int dim_input, int size_output, ptr d_function) { +void backward_linearisation(Kernel_nn* ker, float*** input, float*** input_z, float* output, int depth_input, int dim_input, int size_output,funcPtr d_function) { // Bias for (int j=0; j < size_output; j++) { ker->d_bias[j] += output[j]; @@ -144,7 +144,7 @@ void backward_linearisation(Kernel_nn* ker, float*** input, float*** input_z, fl } } -void backward_convolution(Kernel_cnn* ker, float*** input, float*** input_z, float*** output, int depth_input, int dim_input, int depth_output, int dim_output, ptr d_function, int is_first) { +void backward_convolution(Kernel_cnn* ker, float*** input, float*** input_z, float*** output, int depth_input, int dim_input, int depth_output, int dim_output, funcPtr d_function, int is_first) { // Bias for (int i=0; i < depth_output; i++) { for (int j=0; j < dim_output; j++) { diff --git a/src/cnn/cnn.c b/src/cnn/cnn.c index abc8357..fc4894b 100644 --- a/src/cnn/cnn.c +++ b/src/cnn/cnn.c @@ -251,10 +251,10 @@ void backward_propagation(Network* network, int wanted_number) { if (k_i->cnn) { // Convolution - ptr d_f = get_activation_function(-activation); + funcPtr d_f = get_activation_function(-activation); backward_convolution(k_i->cnn, input, input_z, output, input_depth, input_width, output_depth, output_width, d_f, i==0); } else if (k_i->nn) { // Full connection - ptr d_f = get_activation_function(-activation); + funcPtr d_f = get_activation_function(-activation); if (k_i->linearisation == DOESNT_LINEARISE) { // Vecteur -> Vecteur backward_dense(k_i->nn, input[0][0], input_z[0][0], output[0][0], input_width, output_width, d_f, i==0); } else { // Matrice -> vecteur diff --git a/src/cnn/function.c b/src/cnn/function.c index 0143731..e362e42 100644 --- a/src/cnn/function.c +++ b/src/cnn/function.c @@ -3,9 +3,26 @@ #include #include "../include/colors.h" +#include "../include/utils.h" #include "include/function.h" +#define BLOCKSIZE_x 16 +#define BLOCKSIZE_y 8 +#define BLOCKSIZE_z 8 + +//* Identity +#ifdef __CUDACC__ +__device__ float device_identity(float x) { + return x; +} + +__device__ float device_identity_derivative(float x) { + (void)x; + return 1; +} +#endif + float identity(float x) { return x; } @@ -16,6 +33,18 @@ float identity_derivative(float x) { } +//* Sigmoid +#ifdef __CUDACC__ +__device__ float device_sigmoid(float x) { + return 1/(1 + exp(-x)); +} + +__device__ float device_sigmoid_derivative(float x) { + float tmp = exp(-x); + return tmp/((1+tmp)*(1+tmp)); +} +#endif + float sigmoid(float x) { return 1/(1 + exp(-x)); } @@ -26,6 +55,19 @@ float sigmoid_derivative(float x) { } +//* RELU +#ifdef __CUDACC__ +__device__ float device_relu(float x) { + return fmaxf(0, fminf(x, RELU_CLIP_VALUE)); +} + +__device__ float device_relu_derivative(float x) { + if (x > 0) + return 1; + return 0; +} +#endif + float relu(float x) { return fmaxf(0, fminf(x, RELU_CLIP_VALUE)); } @@ -37,6 +79,21 @@ float relu_derivative(float x) { } +//* Leaky RELU +#ifdef __CUDACC__ +__device__ float device_leaky_relu(float x) { + if (x>0) + return fminf(x, RELU_CLIP_VALUE); + return x*LEAKER; +} + +__device__ float device_leaky_relu_derivative(float x) { + if (x > 0) + return 1; + return LEAKER; +} +#endif + float leaky_relu(float x) { if (x>0) return fminf(x, RELU_CLIP_VALUE); @@ -50,6 +107,22 @@ float leaky_relu_derivative(float x) { } +//* Tanh +#ifdef __CUDACC__ +__device__ +#endif +float device_tanh_(float x) { + return tanh(x); +} + +#ifdef __CUDACC__ +__device__ +#endif +float device_tanh_derivative(float x) { + float a = tanh(x); + return 1 - a*a; +} + float tanh_(float x) { return tanh(x); } @@ -60,6 +133,28 @@ float tanh_derivative(float x) { } + + +#ifdef __CUDACC__ +/* + * Définition des pointeurs de fonctions pour CUDA + * voir https://stackoverflow.com/a/15646771 +*/ +__device__ funcPtr ptr_sigmoid = device_sigmoid; +__device__ funcPtr ptr_relu = device_relu; +__device__ funcPtr ptr_leaky_relu = device_leaky_relu; +__device__ funcPtr ptr_tanh = device_tanh_; +__device__ funcPtr ptr_identity = device_identity; + +__device__ funcPtr ptr_identity_derivative = device_identity_derivative; +__device__ funcPtr ptr_sigmoid_derivative = device_sigmoid_derivative; +__device__ funcPtr ptr_relu_derivative = device_relu_derivative; +__device__ funcPtr ptr_leaky_relu_derivative = device_leaky_relu_derivative; +__device__ funcPtr ptr_tanh_derivative = device_tanh_derivative; +#endif + + + void apply_softmax_input(float ***input, int depth, int rows, int columns) { float m = -FLT_MAX; float sum=0; @@ -88,7 +183,41 @@ void apply_softmax_input(float ***input, int depth, int rows, int columns) { } -void apply_function_input(float (*f)(float), float*** input, int depth, int rows, int columns) { +/* +* Apply function on input +*/ +#ifdef __CUDACC__ +__global__ void apply_function_input_kernel(funcPtr f, float*** input, int depth, int rows, int columns) { + // Équivalents respectifs de i, j et k dans la boucle effectuée par le cpu + int idx = threadIdx.x + blockDim.x*blockIdx.x; // < depth + int idy = threadIdx.y + blockDim.y*blockIdx.y; // < rows + int idz = threadIdx.z + blockDim.z*blockIdx.z; // < columns + + if (idx >= depth || idy >= rows || idz >= columns) { + return; + } + + input[idx][idy][idz] = (*f)(input[idx][idy][idz]); +} + + +void apply_function_input_device(int activation, float*** input, int depth, int rows, int columns) { + // Make computation + dim3 gridSize(i_div_up(depth, BLOCKSIZE_x), i_div_up(rows, BLOCKSIZE_y), i_div_up(columns, BLOCKSIZE_z)); + dim3 blockSize(BLOCKSIZE_x, BLOCKSIZE_y, BLOCKSIZE_z); + + funcPtr activation_function = get_activation_function_cuda(activation); + + apply_function_input_kernel<<>>(activation_function, input, depth, rows, columns); + + gpuErrchk( cudaPeekAtLastError() ); + gpuErrchk( cudaDeviceSynchronize() ); +} +#endif + +void apply_function_input_cpu(int activation, float*** input, int depth, int rows, int columns) { + funcPtr f = get_activation_function(activation); + for (int i=0; i < depth; i++) { for (int j=0; j < rows; j++) { for (int k=0; k < columns; k++) { @@ -98,15 +227,25 @@ void apply_function_input(float (*f)(float), float*** input, int depth, int rows } } +#ifdef __CUDACC__ +extern "C" +#endif +void apply_function_input(int activation, float*** input, int depth, int rows, int columns) { + #ifndef __CUDACC__ + apply_function_input_cpu(activation, input, depth, rows, columns); + #else + apply_function_input_device(activation, input, depth, rows, columns); + #endif +} + void apply_function_to_matrix(int activation, float*** input, int depth, int dim) { if (activation == SOFTMAX) { return apply_softmax_input(input, depth, dim, dim); } if (activation >= 1) { // Exclude negative values (derivative) - ptr f = get_activation_function(activation); - return apply_function_input(f, input, depth, dim, dim); + return apply_function_input(activation, input, depth, dim, dim); } - printf_error("fonction d'activation inconnue (apply_function_to_matrix): "); + printf_error((char*)"fonction d'activation inconnue (apply_function_to_matrix): "); printf("%d\n", activation); } @@ -116,59 +255,107 @@ void apply_function_to_vector(int activation, float*** input, int dim) { return apply_softmax_input(input, 1, 1, dim); } if (activation >= 1) { // Exclude negative values (derivative) - ptr f = get_activation_function(activation); - return apply_function_input(f, input, 1, 1, dim); + return apply_function_input(activation, input, 1, 1, dim); } - printf_error("fonction d'activation inconnue (apply_function_to_vector): "); + printf_error((char*)"fonction d'activation inconnue (apply_function_to_vector): "); printf("%d\n", activation); } -ptr get_activation_function(int activation) { - if (activation == RELU) { - return &relu; - } - if (activation == -RELU) { - return &relu_derivative; - } +funcPtr get_activation_function(int activation) { + switch (activation) { + case RELU: + return &relu; + case -RELU: + return &relu_derivative; - if (activation == IDENTITY) { - return &identity; - } - if (activation == -IDENTITY) { - return &identity_derivative; - } + case IDENTITY: + return &identity; + case -IDENTITY: + return &identity_derivative; - if (activation == SIGMOID) { - return &sigmoid; - } - if (activation == -SIGMOID) { - return &sigmoid_derivative; - } + case SIGMOID: + return &sigmoid; + case -SIGMOID: + return &sigmoid_derivative; + + case LEAKY_RELU: + return &leaky_relu; + case -LEAKY_RELU: + return &leaky_relu_derivative; - if (activation == SOFTMAX) { - printf_error("impossible de renvoyer la fonction softmax\n"); - return NULL; - } - if (activation == -SOFTMAX) { - printf_error("impossible de renvoyer la dérivée de la fonction softmax\n"); - return NULL; - } + case TANH: + return &tanh_; + case -TANH: + return &tanh_derivative; - if (activation == TANH) { - return &tanh_; - } - if (activation == -TANH) { - return &tanh_derivative; - } + case SOFTMAX: + printf_error((char*)"impossible de renvoyer la fonction softmax\n"); + return NULL; + case -SOFTMAX: + printf_error((char*)"impossible de renvoyer la dérivée de la fonction softmax\n"); + return NULL; - if (activation == LEAKY_RELU) { - return &leaky_relu; + default: + printf_error((char*)"fonction d'activation inconnue (get_activation_function_cuda): "); + printf("%d\n", activation); + return NULL; } - if (activation == -LEAKY_RELU) { - return &leaky_relu_derivative; - } - printf_error("fonction d'activation inconnue (get_activation_function): "); - printf("%d\n", activation); - return NULL; } + + +#ifdef __CUDACC__ +funcPtr get_activation_function_cuda(int activation) { + funcPtr host_function; + + switch (activation) { + case RELU: + gpuErrchk( cudaMemcpyFromSymbol(&host_function, ptr_relu, sizeof(funcPtr))); + break; + case -RELU: + gpuErrchk( cudaMemcpyFromSymbol(&host_function, ptr_relu_derivative, sizeof(funcPtr))); + break; + + case IDENTITY: + gpuErrchk( cudaMemcpyFromSymbol(&host_function, ptr_identity, sizeof(funcPtr))); + break; + case -IDENTITY: + gpuErrchk( cudaMemcpyFromSymbol(&host_function, ptr_identity_derivative, sizeof(funcPtr))); + break; + + case SIGMOID: + gpuErrchk( cudaMemcpyFromSymbol(&host_function, ptr_sigmoid, sizeof(funcPtr))); + break; + case -SIGMOID: + gpuErrchk( cudaMemcpyFromSymbol(&host_function, ptr_sigmoid_derivative, sizeof(funcPtr))); + break; + + case LEAKY_RELU: + gpuErrchk( cudaMemcpyFromSymbol(&host_function, ptr_leaky_relu, sizeof(funcPtr))); + break; + case -LEAKY_RELU: + gpuErrchk( cudaMemcpyFromSymbol(&host_function, ptr_leaky_relu_derivative, sizeof(funcPtr))); + break; + + case TANH: + gpuErrchk( cudaMemcpyFromSymbol(&host_function, ptr_tanh, sizeof(funcPtr))); + break; + case -TANH: + gpuErrchk( cudaMemcpyFromSymbol(&host_function, ptr_tanh_derivative, sizeof(funcPtr))); + break; + + case SOFTMAX: + printf_error((char*)"impossible de renvoyer la fonction softmax\n"); + return NULL; + case -SOFTMAX: + printf_error((char*)"impossible de renvoyer la dérivée de la fonction softmax\n"); + return NULL; + + default: + printf_error((char*)"fonction d'activation inconnue (get_activation_function_cuda): "); + printf("%d\n", activation); + return NULL; + } + return host_function; +} +#endif \ No newline at end of file diff --git a/src/cnn/function.cu b/src/cnn/function.cu new file mode 100644 index 0000000..e362e42 --- /dev/null +++ b/src/cnn/function.cu @@ -0,0 +1,361 @@ +#include +#include +#include + +#include "../include/colors.h" +#include "../include/utils.h" + +#include "include/function.h" + +#define BLOCKSIZE_x 16 +#define BLOCKSIZE_y 8 +#define BLOCKSIZE_z 8 + +//* Identity +#ifdef __CUDACC__ +__device__ float device_identity(float x) { + return x; +} + +__device__ float device_identity_derivative(float x) { + (void)x; + return 1; +} +#endif + +float identity(float x) { + return x; +} + +float identity_derivative(float x) { + (void)x; + return 1; +} + + +//* Sigmoid +#ifdef __CUDACC__ +__device__ float device_sigmoid(float x) { + return 1/(1 + exp(-x)); +} + +__device__ float device_sigmoid_derivative(float x) { + float tmp = exp(-x); + return tmp/((1+tmp)*(1+tmp)); +} +#endif + +float sigmoid(float x) { + return 1/(1 + exp(-x)); +} + +float sigmoid_derivative(float x) { + float tmp = exp(-x); + return tmp/((1+tmp)*(1+tmp)); +} + + +//* RELU +#ifdef __CUDACC__ +__device__ float device_relu(float x) { + return fmaxf(0, fminf(x, RELU_CLIP_VALUE)); +} + +__device__ float device_relu_derivative(float x) { + if (x > 0) + return 1; + return 0; +} +#endif + +float relu(float x) { + return fmaxf(0, fminf(x, RELU_CLIP_VALUE)); +} + +float relu_derivative(float x) { + if (x > 0) + return 1; + return 0; +} + + +//* Leaky RELU +#ifdef __CUDACC__ +__device__ float device_leaky_relu(float x) { + if (x>0) + return fminf(x, RELU_CLIP_VALUE); + return x*LEAKER; +} + +__device__ float device_leaky_relu_derivative(float x) { + if (x > 0) + return 1; + return LEAKER; +} +#endif + +float leaky_relu(float x) { + if (x>0) + return fminf(x, RELU_CLIP_VALUE); + return x*LEAKER; +} + +float leaky_relu_derivative(float x) { + if (x > 0) + return 1; + return LEAKER; +} + + +//* Tanh +#ifdef __CUDACC__ +__device__ +#endif +float device_tanh_(float x) { + return tanh(x); +} + +#ifdef __CUDACC__ +__device__ +#endif +float device_tanh_derivative(float x) { + float a = tanh(x); + return 1 - a*a; +} + +float tanh_(float x) { + return tanh(x); +} + +float tanh_derivative(float x) { + float a = tanh(x); + return 1 - a*a; +} + + + + +#ifdef __CUDACC__ +/* + * Définition des pointeurs de fonctions pour CUDA + * voir https://stackoverflow.com/a/15646771 +*/ +__device__ funcPtr ptr_sigmoid = device_sigmoid; +__device__ funcPtr ptr_relu = device_relu; +__device__ funcPtr ptr_leaky_relu = device_leaky_relu; +__device__ funcPtr ptr_tanh = device_tanh_; +__device__ funcPtr ptr_identity = device_identity; + +__device__ funcPtr ptr_identity_derivative = device_identity_derivative; +__device__ funcPtr ptr_sigmoid_derivative = device_sigmoid_derivative; +__device__ funcPtr ptr_relu_derivative = device_relu_derivative; +__device__ funcPtr ptr_leaky_relu_derivative = device_leaky_relu_derivative; +__device__ funcPtr ptr_tanh_derivative = device_tanh_derivative; +#endif + + + +void apply_softmax_input(float ***input, int depth, int rows, int columns) { + float m = -FLT_MAX; + float sum=0; + for (int i=0; i < depth; i++) { + for (int j=0; j < rows; j++) { + for (int k=0; k < columns; k++) { + m = fmaxf(m, input[i][j][k]); + } + } + } + for (int i=0; i < depth; i++) { + for (int j=0; j < rows; j++) { + for (int k=0; k < columns; k++) { + input[i][j][k] = exp(m-input[i][j][k]); + sum += input[i][j][k]; + } + } + } + for (int i=0; i < depth; i++) { + for (int j=0; j < rows; j++) { + for (int k=0; k < columns; k++) { + input[i][j][k] = input[i][j][k]/sum; + } + } + } +} + + +/* +* Apply function on input +*/ +#ifdef __CUDACC__ +__global__ void apply_function_input_kernel(funcPtr f, float*** input, int depth, int rows, int columns) { + // Équivalents respectifs de i, j et k dans la boucle effectuée par le cpu + int idx = threadIdx.x + blockDim.x*blockIdx.x; // < depth + int idy = threadIdx.y + blockDim.y*blockIdx.y; // < rows + int idz = threadIdx.z + blockDim.z*blockIdx.z; // < columns + + if (idx >= depth || idy >= rows || idz >= columns) { + return; + } + + input[idx][idy][idz] = (*f)(input[idx][idy][idz]); +} + + +void apply_function_input_device(int activation, float*** input, int depth, int rows, int columns) { + // Make computation + dim3 gridSize(i_div_up(depth, BLOCKSIZE_x), i_div_up(rows, BLOCKSIZE_y), i_div_up(columns, BLOCKSIZE_z)); + dim3 blockSize(BLOCKSIZE_x, BLOCKSIZE_y, BLOCKSIZE_z); + + funcPtr activation_function = get_activation_function_cuda(activation); + + apply_function_input_kernel<<>>(activation_function, input, depth, rows, columns); + + gpuErrchk( cudaPeekAtLastError() ); + gpuErrchk( cudaDeviceSynchronize() ); +} +#endif + +void apply_function_input_cpu(int activation, float*** input, int depth, int rows, int columns) { + funcPtr f = get_activation_function(activation); + + for (int i=0; i < depth; i++) { + for (int j=0; j < rows; j++) { + for (int k=0; k < columns; k++) { + input[i][j][k] = (*f)(input[i][j][k]); + } + } + } +} + +#ifdef __CUDACC__ +extern "C" +#endif +void apply_function_input(int activation, float*** input, int depth, int rows, int columns) { + #ifndef __CUDACC__ + apply_function_input_cpu(activation, input, depth, rows, columns); + #else + apply_function_input_device(activation, input, depth, rows, columns); + #endif +} + +void apply_function_to_matrix(int activation, float*** input, int depth, int dim) { + if (activation == SOFTMAX) { + return apply_softmax_input(input, depth, dim, dim); + } + if (activation >= 1) { // Exclude negative values (derivative) + return apply_function_input(activation, input, depth, dim, dim); + } + printf_error((char*)"fonction d'activation inconnue (apply_function_to_matrix): "); + printf("%d\n", activation); +} + + +void apply_function_to_vector(int activation, float*** input, int dim) { + if (activation == SOFTMAX) { + return apply_softmax_input(input, 1, 1, dim); + } + if (activation >= 1) { // Exclude negative values (derivative) + return apply_function_input(activation, input, 1, 1, dim); + } + printf_error((char*)"fonction d'activation inconnue (apply_function_to_vector): "); + printf("%d\n", activation); +} + + +funcPtr get_activation_function(int activation) { + switch (activation) { + case RELU: + return &relu; + case -RELU: + return &relu_derivative; + + case IDENTITY: + return &identity; + case -IDENTITY: + return &identity_derivative; + + case SIGMOID: + return &sigmoid; + case -SIGMOID: + return &sigmoid_derivative; + + case LEAKY_RELU: + return &leaky_relu; + case -LEAKY_RELU: + return &leaky_relu_derivative; + + case TANH: + return &tanh_; + case -TANH: + return &tanh_derivative; + + case SOFTMAX: + printf_error((char*)"impossible de renvoyer la fonction softmax\n"); + return NULL; + case -SOFTMAX: + printf_error((char*)"impossible de renvoyer la dérivée de la fonction softmax\n"); + return NULL; + + default: + printf_error((char*)"fonction d'activation inconnue (get_activation_function_cuda): "); + printf("%d\n", activation); + return NULL; + } +} + + +#ifdef __CUDACC__ +funcPtr get_activation_function_cuda(int activation) { + funcPtr host_function; + + switch (activation) { + case RELU: + gpuErrchk( cudaMemcpyFromSymbol(&host_function, ptr_relu, sizeof(funcPtr))); + break; + case -RELU: + gpuErrchk( cudaMemcpyFromSymbol(&host_function, ptr_relu_derivative, sizeof(funcPtr))); + break; + + case IDENTITY: + gpuErrchk( cudaMemcpyFromSymbol(&host_function, ptr_identity, sizeof(funcPtr))); + break; + case -IDENTITY: + gpuErrchk( cudaMemcpyFromSymbol(&host_function, ptr_identity_derivative, sizeof(funcPtr))); + break; + + case SIGMOID: + gpuErrchk( cudaMemcpyFromSymbol(&host_function, ptr_sigmoid, sizeof(funcPtr))); + break; + case -SIGMOID: + gpuErrchk( cudaMemcpyFromSymbol(&host_function, ptr_sigmoid_derivative, sizeof(funcPtr))); + break; + + case LEAKY_RELU: + gpuErrchk( cudaMemcpyFromSymbol(&host_function, ptr_leaky_relu, sizeof(funcPtr))); + break; + case -LEAKY_RELU: + gpuErrchk( cudaMemcpyFromSymbol(&host_function, ptr_leaky_relu_derivative, sizeof(funcPtr))); + break; + + case TANH: + gpuErrchk( cudaMemcpyFromSymbol(&host_function, ptr_tanh, sizeof(funcPtr))); + break; + case -TANH: + gpuErrchk( cudaMemcpyFromSymbol(&host_function, ptr_tanh_derivative, sizeof(funcPtr))); + break; + + case SOFTMAX: + printf_error((char*)"impossible de renvoyer la fonction softmax\n"); + return NULL; + case -SOFTMAX: + printf_error((char*)"impossible de renvoyer la dérivée de la fonction softmax\n"); + return NULL; + + default: + printf_error((char*)"fonction d'activation inconnue (get_activation_function_cuda): "); + printf("%d\n", activation); + return NULL; + } + return host_function; +} +#endif \ No newline at end of file diff --git a/src/cnn/include/backpropagation.h b/src/cnn/include/backpropagation.h index dea99ff..d44af8e 100644 --- a/src/cnn/include/backpropagation.h +++ b/src/cnn/include/backpropagation.h @@ -40,16 +40,16 @@ void backward_max_pooling(float*** input, float*** output, int input_width, int /* * Transfert les informations d'erreur à travers une couche fully connected */ -void backward_dense(Kernel_nn* ker, float* input, float* input_z, float* output, int size_input, int size_output, ptr d_function, int is_first); +void backward_dense(Kernel_nn* ker, float* input, float* input_z, float* output, int size_input, int size_output, funcPtr d_function, int is_first); /* * Transfert les informations d'erreur à travers une couche de linéarisation */ -void backward_linearisation(Kernel_nn* ker, float*** input, float*** input_z, float* output, int depth_input, int dim_input, int size_output, ptr d_function); +void backward_linearisation(Kernel_nn* ker, float*** input, float*** input_z, float* output, int depth_input, int dim_input, int size_output, funcPtr d_function); /* * Transfert les informations d'erreur à travers un couche de convolution */ -void backward_convolution(Kernel_cnn* ker, float*** input, float*** input_z, float*** output, int depth_input, int dim_input, int depth_output, int dim_output, ptr d_function, int is_first); +void backward_convolution(Kernel_cnn* ker, float*** input, float*** input_z, float*** output, int depth_input, int dim_input, int depth_output, int dim_output, funcPtr d_function, int is_first); #endif diff --git a/src/cnn/include/function.h b/src/cnn/include/function.h index c09ccab..4234009 100644 --- a/src/cnn/include/function.h +++ b/src/cnn/include/function.h @@ -16,52 +16,132 @@ #define RELU_CLIP_VALUE 15 -typedef float (*ptr)(float); -typedef ptr (*pm)(); +typedef float (*funcPtr)(float); +//* Identité +#ifdef __CUDACC__ +__device__ float device_identity(float x); +__device__ float device_identity_derivative(float x); +#endif + +#ifdef __CUDACC__ +extern "C" +#endif float identity(float x); +#ifdef __CUDACC__ +extern "C" +#endif float identity_derivative(float x); +//* Sigmoid +#ifdef __CUDACC__ +__device__ float device_sigmoid(float x); +__device__ float device_sigmoid_derivative(float x); +#endif + +#ifdef __CUDACC__ +extern "C" +#endif float sigmoid(float x); +#ifdef __CUDACC__ +extern "C" +#endif float sigmoid_derivative(float x); +//* RELU +#ifdef __CUDACC__ +__device__ float device_relu(float x); +__device__ float device_relu_derivative(float x); +#endif + +#ifdef __CUDACC__ +extern "C" +#endif float relu(float x); +#ifdef __CUDACC__ +extern "C" +#endif float relu_derivative(float x); +//* Leaky RELU +#ifdef __CUDACC__ +__device__ float device_leaky_relu(float x); +__device__ float device_leaky_relu_derivative(float x); +#endif + +#ifdef __CUDACC__ +extern "C" +#endif float leaky_relu(float x); +#ifdef __CUDACC__ +extern "C" +#endif float leaky_relu_derivative(float x); +//* Tanh +#ifdef __CUDACC__ +__device__ float device_tanh_(float x); +__device__ float device_tanh_derivative(float x); +#endif + +#ifdef __CUDACC__ +extern "C" +#endif float tanh_(float x); +#ifdef __CUDACC__ +extern "C" +#endif float tanh_derivative(float x); + +#ifdef __CUDACC__ +extern "C" +#endif /* * Applique softmax sur input[depth][rows][columns] */ void apply_softmax_input(float ***input, int depth, int rows, int columns); +#ifdef __CUDACC__ +extern "C" +#endif /* * Applique la fonction f sur input[depth][rows][columns] */ -void apply_function_input(float (*f)(float), float*** input, int depth, int rows, int columns); +void apply_function_input(int activation, float*** input, int depth, int rows, int columns); +#ifdef __CUDACC__ +extern "C" +#endif /* * Applique une fonction d'activation (repérée par son identifiant) à une matrice */ void apply_function_to_matrix(int activation, float*** input, int depth, int dim); +#ifdef __CUDACC__ +extern "C" +#endif /* * Applique une fonction d'activation (repérée par son identifiant) à un vecteur */ void apply_function_to_vector(int activation, float*** input, int dim); +#ifdef __CUDACC__ +extern "C" +#endif /* * Renvoie la fonction d'activation correspondant à son identifiant (activation) */ -ptr get_activation_function(int activation); +funcPtr get_activation_function(int activation); + +/* +* Récupère un pointeur sur le device vers la fonction d'activation demandée puis le transforme en pointeur sur l'host +*/ +funcPtr get_activation_function_cuda(int activation); #endif \ No newline at end of file diff --git a/test/cnn_function.cu b/test/cnn_function.cu new file mode 100644 index 0000000..931a50b --- /dev/null +++ b/test/cnn_function.cu @@ -0,0 +1,60 @@ +#include +#include +#include + +#include "../src/include/memory_management.h" +#include "../src/cnn/include/function.h" +#include "../src/include/colors.h" + + +int main() { + printf("Initialisation\n"); + // Initialise values + int depth = 10; + int rows = 10; + int columns = 10; + + float*** input = (float***)nalloc(depth, sizeof(float**)); + float*** input_initial = (float***)malloc(depth*sizeof(float**)); + for (int i=0; i < depth; i++) { + input[i] = (float**)nalloc(rows, sizeof(float*)); + input_initial[i] = (float**)malloc(rows*sizeof(float*)); + for (int j=0; j < rows; j++) { + input[i][j] = (float*)nalloc(columns, sizeof(float)); + input_initial[i][j] = (float*)malloc(columns*sizeof(float)); + for (int k=0; k < columns; k++) { + input[i][j][k] = rand()/RAND_MAX; + input_initial[i][j][k] = input[i][j][k]; + } + } + } + printf(GREEN "OK\n" RESET); + + funcPtr func = get_activation_function(TANH); + + printf("Calcul par CUDA\n"); + apply_function_input(TANH, input, depth, rows, columns); + printf(GREEN "OK\n" RESET); + + printf("Vérification des résultats\n"); + for (int i=0; i < depth; i++) { + for (int j=0; j < rows; j++) { + for (int k=0; k < columns; k++) { + if (fabs((*func)(input_initial[i][j][k]) - input[i][j][k]) > 1e-6) { + printf_error((char*)"Les résultats ne coincident pas\n"); + printf("Différence %e\n", fabs((*func)(input_initial[i][j][k]) - input[i][j][k])); + //exit(1); + } + } + gree(input[i][j]); + free(input_initial[i][j]); + } + gree(input[i]); + free(input_initial[i]); + } + gree(input); + free(input_initial); + + printf(GREEN "OK\n" RESET); + return 0; +} \ No newline at end of file