mirror of
https://github.com/augustin64/projet-tipe
synced 2025-01-23 15:16:26 +01:00
Add function.cu
Translate `apply_function_input` and `get_activation_function` to CUDA
This commit is contained in:
parent
e4003aea28
commit
677de892e7
5
Makefile
5
Makefile
@ -95,7 +95,7 @@ $(BUILDDIR)/cnn-main-cuda: $(BUILDDIR)/cnn_main.cuda.o \
|
||||
$(BUILDDIR)/cnn_initialisation.cuda.o \
|
||||
$(BUILDDIR)/cnn_cuda_make.o \
|
||||
$(BUILDDIR)/cnn_neuron_io.cuda.o \
|
||||
$(BUILDDIR)/cnn_function.cuda.o \
|
||||
$(BUILDDIR)/cnn_cuda_function.o \
|
||||
$(BUILDDIR)/cnn_utils.cuda.o \
|
||||
$(BUILDDIR)/cnn_update.cuda.o \
|
||||
$(BUILDDIR)/cnn_free.cuda.o \
|
||||
@ -223,4 +223,5 @@ $(CACHE_DIR)/mnist-reseau-cnn.bin: $(BUILDDIR)/cnn-main
|
||||
#
|
||||
clean:
|
||||
rm -rf $(BUILDDIR)/*
|
||||
rm -f $(CACHE_DIR)/*
|
||||
|
||||
#rm -f $(CACHE_DIR)/*
|
@ -82,7 +82,7 @@ void backward_max_pooling(float*** input, float*** output, int input_width, int
|
||||
}
|
||||
}
|
||||
|
||||
void backward_dense(Kernel_nn* ker, float* input, float* input_z, float* output, int size_input, int size_output, ptr d_function, int is_first) {
|
||||
void backward_dense(Kernel_nn* ker, float* input, float* input_z, float* output, int size_input, int size_output, funcPtr d_function, int is_first) {
|
||||
// Bias
|
||||
for (int j=0; j < size_output; j++) {
|
||||
ker->d_bias[j] += output[j];
|
||||
@ -109,7 +109,7 @@ void backward_dense(Kernel_nn* ker, float* input, float* input_z, float* output,
|
||||
}
|
||||
}
|
||||
|
||||
void backward_linearisation(Kernel_nn* ker, float*** input, float*** input_z, float* output, int depth_input, int dim_input, int size_output, ptr d_function) {
|
||||
void backward_linearisation(Kernel_nn* ker, float*** input, float*** input_z, float* output, int depth_input, int dim_input, int size_output,funcPtr d_function) {
|
||||
// Bias
|
||||
for (int j=0; j < size_output; j++) {
|
||||
ker->d_bias[j] += output[j];
|
||||
@ -144,7 +144,7 @@ void backward_linearisation(Kernel_nn* ker, float*** input, float*** input_z, fl
|
||||
}
|
||||
}
|
||||
|
||||
void backward_convolution(Kernel_cnn* ker, float*** input, float*** input_z, float*** output, int depth_input, int dim_input, int depth_output, int dim_output, ptr d_function, int is_first) {
|
||||
void backward_convolution(Kernel_cnn* ker, float*** input, float*** input_z, float*** output, int depth_input, int dim_input, int depth_output, int dim_output, funcPtr d_function, int is_first) {
|
||||
// Bias
|
||||
for (int i=0; i < depth_output; i++) {
|
||||
for (int j=0; j < dim_output; j++) {
|
||||
|
@ -251,10 +251,10 @@ void backward_propagation(Network* network, int wanted_number) {
|
||||
|
||||
|
||||
if (k_i->cnn) { // Convolution
|
||||
ptr d_f = get_activation_function(-activation);
|
||||
funcPtr d_f = get_activation_function(-activation);
|
||||
backward_convolution(k_i->cnn, input, input_z, output, input_depth, input_width, output_depth, output_width, d_f, i==0);
|
||||
} else if (k_i->nn) { // Full connection
|
||||
ptr d_f = get_activation_function(-activation);
|
||||
funcPtr d_f = get_activation_function(-activation);
|
||||
if (k_i->linearisation == DOESNT_LINEARISE) { // Vecteur -> Vecteur
|
||||
backward_dense(k_i->nn, input[0][0], input_z[0][0], output[0][0], input_width, output_width, d_f, i==0);
|
||||
} else { // Matrice -> vecteur
|
||||
|
@ -3,9 +3,26 @@
|
||||
#include <float.h>
|
||||
|
||||
#include "../include/colors.h"
|
||||
#include "../include/utils.h"
|
||||
|
||||
#include "include/function.h"
|
||||
|
||||
#define BLOCKSIZE_x 16
|
||||
#define BLOCKSIZE_y 8
|
||||
#define BLOCKSIZE_z 8
|
||||
|
||||
//* Identity
|
||||
#ifdef __CUDACC__
|
||||
__device__ float device_identity(float x) {
|
||||
return x;
|
||||
}
|
||||
|
||||
__device__ float device_identity_derivative(float x) {
|
||||
(void)x;
|
||||
return 1;
|
||||
}
|
||||
#endif
|
||||
|
||||
float identity(float x) {
|
||||
return x;
|
||||
}
|
||||
@ -16,6 +33,18 @@ float identity_derivative(float x) {
|
||||
}
|
||||
|
||||
|
||||
//* Sigmoid
|
||||
#ifdef __CUDACC__
|
||||
__device__ float device_sigmoid(float x) {
|
||||
return 1/(1 + exp(-x));
|
||||
}
|
||||
|
||||
__device__ float device_sigmoid_derivative(float x) {
|
||||
float tmp = exp(-x);
|
||||
return tmp/((1+tmp)*(1+tmp));
|
||||
}
|
||||
#endif
|
||||
|
||||
float sigmoid(float x) {
|
||||
return 1/(1 + exp(-x));
|
||||
}
|
||||
@ -26,6 +55,19 @@ float sigmoid_derivative(float x) {
|
||||
}
|
||||
|
||||
|
||||
//* RELU
|
||||
#ifdef __CUDACC__
|
||||
__device__ float device_relu(float x) {
|
||||
return fmaxf(0, fminf(x, RELU_CLIP_VALUE));
|
||||
}
|
||||
|
||||
__device__ float device_relu_derivative(float x) {
|
||||
if (x > 0)
|
||||
return 1;
|
||||
return 0;
|
||||
}
|
||||
#endif
|
||||
|
||||
float relu(float x) {
|
||||
return fmaxf(0, fminf(x, RELU_CLIP_VALUE));
|
||||
}
|
||||
@ -37,6 +79,21 @@ float relu_derivative(float x) {
|
||||
}
|
||||
|
||||
|
||||
//* Leaky RELU
|
||||
#ifdef __CUDACC__
|
||||
__device__ float device_leaky_relu(float x) {
|
||||
if (x>0)
|
||||
return fminf(x, RELU_CLIP_VALUE);
|
||||
return x*LEAKER;
|
||||
}
|
||||
|
||||
__device__ float device_leaky_relu_derivative(float x) {
|
||||
if (x > 0)
|
||||
return 1;
|
||||
return LEAKER;
|
||||
}
|
||||
#endif
|
||||
|
||||
float leaky_relu(float x) {
|
||||
if (x>0)
|
||||
return fminf(x, RELU_CLIP_VALUE);
|
||||
@ -50,6 +107,22 @@ float leaky_relu_derivative(float x) {
|
||||
}
|
||||
|
||||
|
||||
//* Tanh
|
||||
#ifdef __CUDACC__
|
||||
__device__
|
||||
#endif
|
||||
float device_tanh_(float x) {
|
||||
return tanh(x);
|
||||
}
|
||||
|
||||
#ifdef __CUDACC__
|
||||
__device__
|
||||
#endif
|
||||
float device_tanh_derivative(float x) {
|
||||
float a = tanh(x);
|
||||
return 1 - a*a;
|
||||
}
|
||||
|
||||
float tanh_(float x) {
|
||||
return tanh(x);
|
||||
}
|
||||
@ -60,6 +133,28 @@ float tanh_derivative(float x) {
|
||||
}
|
||||
|
||||
|
||||
|
||||
|
||||
#ifdef __CUDACC__
|
||||
/*
|
||||
* Définition des pointeurs de fonctions pour CUDA
|
||||
* voir https://stackoverflow.com/a/15646771
|
||||
*/
|
||||
__device__ funcPtr ptr_sigmoid = device_sigmoid;
|
||||
__device__ funcPtr ptr_relu = device_relu;
|
||||
__device__ funcPtr ptr_leaky_relu = device_leaky_relu;
|
||||
__device__ funcPtr ptr_tanh = device_tanh_;
|
||||
__device__ funcPtr ptr_identity = device_identity;
|
||||
|
||||
__device__ funcPtr ptr_identity_derivative = device_identity_derivative;
|
||||
__device__ funcPtr ptr_sigmoid_derivative = device_sigmoid_derivative;
|
||||
__device__ funcPtr ptr_relu_derivative = device_relu_derivative;
|
||||
__device__ funcPtr ptr_leaky_relu_derivative = device_leaky_relu_derivative;
|
||||
__device__ funcPtr ptr_tanh_derivative = device_tanh_derivative;
|
||||
#endif
|
||||
|
||||
|
||||
|
||||
void apply_softmax_input(float ***input, int depth, int rows, int columns) {
|
||||
float m = -FLT_MAX;
|
||||
float sum=0;
|
||||
@ -88,7 +183,41 @@ void apply_softmax_input(float ***input, int depth, int rows, int columns) {
|
||||
}
|
||||
|
||||
|
||||
void apply_function_input(float (*f)(float), float*** input, int depth, int rows, int columns) {
|
||||
/*
|
||||
* Apply function on input
|
||||
*/
|
||||
#ifdef __CUDACC__
|
||||
__global__ void apply_function_input_kernel(funcPtr f, float*** input, int depth, int rows, int columns) {
|
||||
// Équivalents respectifs de i, j et k dans la boucle effectuée par le cpu
|
||||
int idx = threadIdx.x + blockDim.x*blockIdx.x; // < depth
|
||||
int idy = threadIdx.y + blockDim.y*blockIdx.y; // < rows
|
||||
int idz = threadIdx.z + blockDim.z*blockIdx.z; // < columns
|
||||
|
||||
if (idx >= depth || idy >= rows || idz >= columns) {
|
||||
return;
|
||||
}
|
||||
|
||||
input[idx][idy][idz] = (*f)(input[idx][idy][idz]);
|
||||
}
|
||||
|
||||
|
||||
void apply_function_input_device(int activation, float*** input, int depth, int rows, int columns) {
|
||||
// Make computation
|
||||
dim3 gridSize(i_div_up(depth, BLOCKSIZE_x), i_div_up(rows, BLOCKSIZE_y), i_div_up(columns, BLOCKSIZE_z));
|
||||
dim3 blockSize(BLOCKSIZE_x, BLOCKSIZE_y, BLOCKSIZE_z);
|
||||
|
||||
funcPtr activation_function = get_activation_function_cuda(activation);
|
||||
|
||||
apply_function_input_kernel<<<gridSize, blockSize>>>(activation_function, input, depth, rows, columns);
|
||||
|
||||
gpuErrchk( cudaPeekAtLastError() );
|
||||
gpuErrchk( cudaDeviceSynchronize() );
|
||||
}
|
||||
#endif
|
||||
|
||||
void apply_function_input_cpu(int activation, float*** input, int depth, int rows, int columns) {
|
||||
funcPtr f = get_activation_function(activation);
|
||||
|
||||
for (int i=0; i < depth; i++) {
|
||||
for (int j=0; j < rows; j++) {
|
||||
for (int k=0; k < columns; k++) {
|
||||
@ -98,15 +227,25 @@ void apply_function_input(float (*f)(float), float*** input, int depth, int rows
|
||||
}
|
||||
}
|
||||
|
||||
#ifdef __CUDACC__
|
||||
extern "C"
|
||||
#endif
|
||||
void apply_function_input(int activation, float*** input, int depth, int rows, int columns) {
|
||||
#ifndef __CUDACC__
|
||||
apply_function_input_cpu(activation, input, depth, rows, columns);
|
||||
#else
|
||||
apply_function_input_device(activation, input, depth, rows, columns);
|
||||
#endif
|
||||
}
|
||||
|
||||
void apply_function_to_matrix(int activation, float*** input, int depth, int dim) {
|
||||
if (activation == SOFTMAX) {
|
||||
return apply_softmax_input(input, depth, dim, dim);
|
||||
}
|
||||
if (activation >= 1) { // Exclude negative values (derivative)
|
||||
ptr f = get_activation_function(activation);
|
||||
return apply_function_input(f, input, depth, dim, dim);
|
||||
return apply_function_input(activation, input, depth, dim, dim);
|
||||
}
|
||||
printf_error("fonction d'activation inconnue (apply_function_to_matrix): ");
|
||||
printf_error((char*)"fonction d'activation inconnue (apply_function_to_matrix): ");
|
||||
printf("%d\n", activation);
|
||||
}
|
||||
|
||||
@ -116,59 +255,107 @@ void apply_function_to_vector(int activation, float*** input, int dim) {
|
||||
return apply_softmax_input(input, 1, 1, dim);
|
||||
}
|
||||
if (activation >= 1) { // Exclude negative values (derivative)
|
||||
ptr f = get_activation_function(activation);
|
||||
return apply_function_input(f, input, 1, 1, dim);
|
||||
return apply_function_input(activation, input, 1, 1, dim);
|
||||
}
|
||||
printf_error("fonction d'activation inconnue (apply_function_to_vector): ");
|
||||
printf_error((char*)"fonction d'activation inconnue (apply_function_to_vector): ");
|
||||
printf("%d\n", activation);
|
||||
}
|
||||
|
||||
|
||||
ptr get_activation_function(int activation) {
|
||||
if (activation == RELU) {
|
||||
return &relu;
|
||||
}
|
||||
if (activation == -RELU) {
|
||||
return &relu_derivative;
|
||||
}
|
||||
funcPtr get_activation_function(int activation) {
|
||||
switch (activation) {
|
||||
case RELU:
|
||||
return &relu;
|
||||
case -RELU:
|
||||
return &relu_derivative;
|
||||
|
||||
if (activation == IDENTITY) {
|
||||
return &identity;
|
||||
}
|
||||
if (activation == -IDENTITY) {
|
||||
return &identity_derivative;
|
||||
}
|
||||
case IDENTITY:
|
||||
return &identity;
|
||||
case -IDENTITY:
|
||||
return &identity_derivative;
|
||||
|
||||
if (activation == SIGMOID) {
|
||||
return &sigmoid;
|
||||
}
|
||||
if (activation == -SIGMOID) {
|
||||
return &sigmoid_derivative;
|
||||
}
|
||||
case SIGMOID:
|
||||
return &sigmoid;
|
||||
case -SIGMOID:
|
||||
return &sigmoid_derivative;
|
||||
|
||||
if (activation == SOFTMAX) {
|
||||
printf_error("impossible de renvoyer la fonction softmax\n");
|
||||
return NULL;
|
||||
}
|
||||
if (activation == -SOFTMAX) {
|
||||
printf_error("impossible de renvoyer la dérivée de la fonction softmax\n");
|
||||
return NULL;
|
||||
}
|
||||
case LEAKY_RELU:
|
||||
return &leaky_relu;
|
||||
case -LEAKY_RELU:
|
||||
return &leaky_relu_derivative;
|
||||
|
||||
if (activation == TANH) {
|
||||
return &tanh_;
|
||||
}
|
||||
if (activation == -TANH) {
|
||||
return &tanh_derivative;
|
||||
}
|
||||
case TANH:
|
||||
return &tanh_;
|
||||
case -TANH:
|
||||
return &tanh_derivative;
|
||||
|
||||
if (activation == LEAKY_RELU) {
|
||||
return &leaky_relu;
|
||||
case SOFTMAX:
|
||||
printf_error((char*)"impossible de renvoyer la fonction softmax\n");
|
||||
return NULL;
|
||||
case -SOFTMAX:
|
||||
printf_error((char*)"impossible de renvoyer la dérivée de la fonction softmax\n");
|
||||
return NULL;
|
||||
|
||||
default:
|
||||
printf_error((char*)"fonction d'activation inconnue (get_activation_function_cuda): ");
|
||||
printf("%d\n", activation);
|
||||
return NULL;
|
||||
}
|
||||
if (activation == -LEAKY_RELU) {
|
||||
return &leaky_relu_derivative;
|
||||
}
|
||||
printf_error("fonction d'activation inconnue (get_activation_function): ");
|
||||
printf("%d\n", activation);
|
||||
return NULL;
|
||||
}
|
||||
|
||||
|
||||
#ifdef __CUDACC__
|
||||
funcPtr get_activation_function_cuda(int activation) {
|
||||
funcPtr host_function;
|
||||
|
||||
switch (activation) {
|
||||
case RELU:
|
||||
gpuErrchk( cudaMemcpyFromSymbol(&host_function, ptr_relu, sizeof(funcPtr)));
|
||||
break;
|
||||
case -RELU:
|
||||
gpuErrchk( cudaMemcpyFromSymbol(&host_function, ptr_relu_derivative, sizeof(funcPtr)));
|
||||
break;
|
||||
|
||||
case IDENTITY:
|
||||
gpuErrchk( cudaMemcpyFromSymbol(&host_function, ptr_identity, sizeof(funcPtr)));
|
||||
break;
|
||||
case -IDENTITY:
|
||||
gpuErrchk( cudaMemcpyFromSymbol(&host_function, ptr_identity_derivative, sizeof(funcPtr)));
|
||||
break;
|
||||
|
||||
case SIGMOID:
|
||||
gpuErrchk( cudaMemcpyFromSymbol(&host_function, ptr_sigmoid, sizeof(funcPtr)));
|
||||
break;
|
||||
case -SIGMOID:
|
||||
gpuErrchk( cudaMemcpyFromSymbol(&host_function, ptr_sigmoid_derivative, sizeof(funcPtr)));
|
||||
break;
|
||||
|
||||
case LEAKY_RELU:
|
||||
gpuErrchk( cudaMemcpyFromSymbol(&host_function, ptr_leaky_relu, sizeof(funcPtr)));
|
||||
break;
|
||||
case -LEAKY_RELU:
|
||||
gpuErrchk( cudaMemcpyFromSymbol(&host_function, ptr_leaky_relu_derivative, sizeof(funcPtr)));
|
||||
break;
|
||||
|
||||
case TANH:
|
||||
gpuErrchk( cudaMemcpyFromSymbol(&host_function, ptr_tanh, sizeof(funcPtr)));
|
||||
break;
|
||||
case -TANH:
|
||||
gpuErrchk( cudaMemcpyFromSymbol(&host_function, ptr_tanh_derivative, sizeof(funcPtr)));
|
||||
break;
|
||||
|
||||
case SOFTMAX:
|
||||
printf_error((char*)"impossible de renvoyer la fonction softmax\n");
|
||||
return NULL;
|
||||
case -SOFTMAX:
|
||||
printf_error((char*)"impossible de renvoyer la dérivée de la fonction softmax\n");
|
||||
return NULL;
|
||||
|
||||
default:
|
||||
printf_error((char*)"fonction d'activation inconnue (get_activation_function_cuda): ");
|
||||
printf("%d\n", activation);
|
||||
return NULL;
|
||||
}
|
||||
return host_function;
|
||||
}
|
||||
#endif
|
361
src/cnn/function.cu
Normal file
361
src/cnn/function.cu
Normal file
@ -0,0 +1,361 @@
|
||||
#include <stdio.h>
|
||||
#include <math.h>
|
||||
#include <float.h>
|
||||
|
||||
#include "../include/colors.h"
|
||||
#include "../include/utils.h"
|
||||
|
||||
#include "include/function.h"
|
||||
|
||||
#define BLOCKSIZE_x 16
|
||||
#define BLOCKSIZE_y 8
|
||||
#define BLOCKSIZE_z 8
|
||||
|
||||
//* Identity
|
||||
#ifdef __CUDACC__
|
||||
__device__ float device_identity(float x) {
|
||||
return x;
|
||||
}
|
||||
|
||||
__device__ float device_identity_derivative(float x) {
|
||||
(void)x;
|
||||
return 1;
|
||||
}
|
||||
#endif
|
||||
|
||||
float identity(float x) {
|
||||
return x;
|
||||
}
|
||||
|
||||
float identity_derivative(float x) {
|
||||
(void)x;
|
||||
return 1;
|
||||
}
|
||||
|
||||
|
||||
//* Sigmoid
|
||||
#ifdef __CUDACC__
|
||||
__device__ float device_sigmoid(float x) {
|
||||
return 1/(1 + exp(-x));
|
||||
}
|
||||
|
||||
__device__ float device_sigmoid_derivative(float x) {
|
||||
float tmp = exp(-x);
|
||||
return tmp/((1+tmp)*(1+tmp));
|
||||
}
|
||||
#endif
|
||||
|
||||
float sigmoid(float x) {
|
||||
return 1/(1 + exp(-x));
|
||||
}
|
||||
|
||||
float sigmoid_derivative(float x) {
|
||||
float tmp = exp(-x);
|
||||
return tmp/((1+tmp)*(1+tmp));
|
||||
}
|
||||
|
||||
|
||||
//* RELU
|
||||
#ifdef __CUDACC__
|
||||
__device__ float device_relu(float x) {
|
||||
return fmaxf(0, fminf(x, RELU_CLIP_VALUE));
|
||||
}
|
||||
|
||||
__device__ float device_relu_derivative(float x) {
|
||||
if (x > 0)
|
||||
return 1;
|
||||
return 0;
|
||||
}
|
||||
#endif
|
||||
|
||||
float relu(float x) {
|
||||
return fmaxf(0, fminf(x, RELU_CLIP_VALUE));
|
||||
}
|
||||
|
||||
float relu_derivative(float x) {
|
||||
if (x > 0)
|
||||
return 1;
|
||||
return 0;
|
||||
}
|
||||
|
||||
|
||||
//* Leaky RELU
|
||||
#ifdef __CUDACC__
|
||||
__device__ float device_leaky_relu(float x) {
|
||||
if (x>0)
|
||||
return fminf(x, RELU_CLIP_VALUE);
|
||||
return x*LEAKER;
|
||||
}
|
||||
|
||||
__device__ float device_leaky_relu_derivative(float x) {
|
||||
if (x > 0)
|
||||
return 1;
|
||||
return LEAKER;
|
||||
}
|
||||
#endif
|
||||
|
||||
float leaky_relu(float x) {
|
||||
if (x>0)
|
||||
return fminf(x, RELU_CLIP_VALUE);
|
||||
return x*LEAKER;
|
||||
}
|
||||
|
||||
float leaky_relu_derivative(float x) {
|
||||
if (x > 0)
|
||||
return 1;
|
||||
return LEAKER;
|
||||
}
|
||||
|
||||
|
||||
//* Tanh
|
||||
#ifdef __CUDACC__
|
||||
__device__
|
||||
#endif
|
||||
float device_tanh_(float x) {
|
||||
return tanh(x);
|
||||
}
|
||||
|
||||
#ifdef __CUDACC__
|
||||
__device__
|
||||
#endif
|
||||
float device_tanh_derivative(float x) {
|
||||
float a = tanh(x);
|
||||
return 1 - a*a;
|
||||
}
|
||||
|
||||
float tanh_(float x) {
|
||||
return tanh(x);
|
||||
}
|
||||
|
||||
float tanh_derivative(float x) {
|
||||
float a = tanh(x);
|
||||
return 1 - a*a;
|
||||
}
|
||||
|
||||
|
||||
|
||||
|
||||
#ifdef __CUDACC__
|
||||
/*
|
||||
* Définition des pointeurs de fonctions pour CUDA
|
||||
* voir https://stackoverflow.com/a/15646771
|
||||
*/
|
||||
__device__ funcPtr ptr_sigmoid = device_sigmoid;
|
||||
__device__ funcPtr ptr_relu = device_relu;
|
||||
__device__ funcPtr ptr_leaky_relu = device_leaky_relu;
|
||||
__device__ funcPtr ptr_tanh = device_tanh_;
|
||||
__device__ funcPtr ptr_identity = device_identity;
|
||||
|
||||
__device__ funcPtr ptr_identity_derivative = device_identity_derivative;
|
||||
__device__ funcPtr ptr_sigmoid_derivative = device_sigmoid_derivative;
|
||||
__device__ funcPtr ptr_relu_derivative = device_relu_derivative;
|
||||
__device__ funcPtr ptr_leaky_relu_derivative = device_leaky_relu_derivative;
|
||||
__device__ funcPtr ptr_tanh_derivative = device_tanh_derivative;
|
||||
#endif
|
||||
|
||||
|
||||
|
||||
void apply_softmax_input(float ***input, int depth, int rows, int columns) {
|
||||
float m = -FLT_MAX;
|
||||
float sum=0;
|
||||
for (int i=0; i < depth; i++) {
|
||||
for (int j=0; j < rows; j++) {
|
||||
for (int k=0; k < columns; k++) {
|
||||
m = fmaxf(m, input[i][j][k]);
|
||||
}
|
||||
}
|
||||
}
|
||||
for (int i=0; i < depth; i++) {
|
||||
for (int j=0; j < rows; j++) {
|
||||
for (int k=0; k < columns; k++) {
|
||||
input[i][j][k] = exp(m-input[i][j][k]);
|
||||
sum += input[i][j][k];
|
||||
}
|
||||
}
|
||||
}
|
||||
for (int i=0; i < depth; i++) {
|
||||
for (int j=0; j < rows; j++) {
|
||||
for (int k=0; k < columns; k++) {
|
||||
input[i][j][k] = input[i][j][k]/sum;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
/*
|
||||
* Apply function on input
|
||||
*/
|
||||
#ifdef __CUDACC__
|
||||
__global__ void apply_function_input_kernel(funcPtr f, float*** input, int depth, int rows, int columns) {
|
||||
// Équivalents respectifs de i, j et k dans la boucle effectuée par le cpu
|
||||
int idx = threadIdx.x + blockDim.x*blockIdx.x; // < depth
|
||||
int idy = threadIdx.y + blockDim.y*blockIdx.y; // < rows
|
||||
int idz = threadIdx.z + blockDim.z*blockIdx.z; // < columns
|
||||
|
||||
if (idx >= depth || idy >= rows || idz >= columns) {
|
||||
return;
|
||||
}
|
||||
|
||||
input[idx][idy][idz] = (*f)(input[idx][idy][idz]);
|
||||
}
|
||||
|
||||
|
||||
void apply_function_input_device(int activation, float*** input, int depth, int rows, int columns) {
|
||||
// Make computation
|
||||
dim3 gridSize(i_div_up(depth, BLOCKSIZE_x), i_div_up(rows, BLOCKSIZE_y), i_div_up(columns, BLOCKSIZE_z));
|
||||
dim3 blockSize(BLOCKSIZE_x, BLOCKSIZE_y, BLOCKSIZE_z);
|
||||
|
||||
funcPtr activation_function = get_activation_function_cuda(activation);
|
||||
|
||||
apply_function_input_kernel<<<gridSize, blockSize>>>(activation_function, input, depth, rows, columns);
|
||||
|
||||
gpuErrchk( cudaPeekAtLastError() );
|
||||
gpuErrchk( cudaDeviceSynchronize() );
|
||||
}
|
||||
#endif
|
||||
|
||||
void apply_function_input_cpu(int activation, float*** input, int depth, int rows, int columns) {
|
||||
funcPtr f = get_activation_function(activation);
|
||||
|
||||
for (int i=0; i < depth; i++) {
|
||||
for (int j=0; j < rows; j++) {
|
||||
for (int k=0; k < columns; k++) {
|
||||
input[i][j][k] = (*f)(input[i][j][k]);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
#ifdef __CUDACC__
|
||||
extern "C"
|
||||
#endif
|
||||
void apply_function_input(int activation, float*** input, int depth, int rows, int columns) {
|
||||
#ifndef __CUDACC__
|
||||
apply_function_input_cpu(activation, input, depth, rows, columns);
|
||||
#else
|
||||
apply_function_input_device(activation, input, depth, rows, columns);
|
||||
#endif
|
||||
}
|
||||
|
||||
void apply_function_to_matrix(int activation, float*** input, int depth, int dim) {
|
||||
if (activation == SOFTMAX) {
|
||||
return apply_softmax_input(input, depth, dim, dim);
|
||||
}
|
||||
if (activation >= 1) { // Exclude negative values (derivative)
|
||||
return apply_function_input(activation, input, depth, dim, dim);
|
||||
}
|
||||
printf_error((char*)"fonction d'activation inconnue (apply_function_to_matrix): ");
|
||||
printf("%d\n", activation);
|
||||
}
|
||||
|
||||
|
||||
void apply_function_to_vector(int activation, float*** input, int dim) {
|
||||
if (activation == SOFTMAX) {
|
||||
return apply_softmax_input(input, 1, 1, dim);
|
||||
}
|
||||
if (activation >= 1) { // Exclude negative values (derivative)
|
||||
return apply_function_input(activation, input, 1, 1, dim);
|
||||
}
|
||||
printf_error((char*)"fonction d'activation inconnue (apply_function_to_vector): ");
|
||||
printf("%d\n", activation);
|
||||
}
|
||||
|
||||
|
||||
funcPtr get_activation_function(int activation) {
|
||||
switch (activation) {
|
||||
case RELU:
|
||||
return &relu;
|
||||
case -RELU:
|
||||
return &relu_derivative;
|
||||
|
||||
case IDENTITY:
|
||||
return &identity;
|
||||
case -IDENTITY:
|
||||
return &identity_derivative;
|
||||
|
||||
case SIGMOID:
|
||||
return &sigmoid;
|
||||
case -SIGMOID:
|
||||
return &sigmoid_derivative;
|
||||
|
||||
case LEAKY_RELU:
|
||||
return &leaky_relu;
|
||||
case -LEAKY_RELU:
|
||||
return &leaky_relu_derivative;
|
||||
|
||||
case TANH:
|
||||
return &tanh_;
|
||||
case -TANH:
|
||||
return &tanh_derivative;
|
||||
|
||||
case SOFTMAX:
|
||||
printf_error((char*)"impossible de renvoyer la fonction softmax\n");
|
||||
return NULL;
|
||||
case -SOFTMAX:
|
||||
printf_error((char*)"impossible de renvoyer la dérivée de la fonction softmax\n");
|
||||
return NULL;
|
||||
|
||||
default:
|
||||
printf_error((char*)"fonction d'activation inconnue (get_activation_function_cuda): ");
|
||||
printf("%d\n", activation);
|
||||
return NULL;
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
#ifdef __CUDACC__
|
||||
funcPtr get_activation_function_cuda(int activation) {
|
||||
funcPtr host_function;
|
||||
|
||||
switch (activation) {
|
||||
case RELU:
|
||||
gpuErrchk( cudaMemcpyFromSymbol(&host_function, ptr_relu, sizeof(funcPtr)));
|
||||
break;
|
||||
case -RELU:
|
||||
gpuErrchk( cudaMemcpyFromSymbol(&host_function, ptr_relu_derivative, sizeof(funcPtr)));
|
||||
break;
|
||||
|
||||
case IDENTITY:
|
||||
gpuErrchk( cudaMemcpyFromSymbol(&host_function, ptr_identity, sizeof(funcPtr)));
|
||||
break;
|
||||
case -IDENTITY:
|
||||
gpuErrchk( cudaMemcpyFromSymbol(&host_function, ptr_identity_derivative, sizeof(funcPtr)));
|
||||
break;
|
||||
|
||||
case SIGMOID:
|
||||
gpuErrchk( cudaMemcpyFromSymbol(&host_function, ptr_sigmoid, sizeof(funcPtr)));
|
||||
break;
|
||||
case -SIGMOID:
|
||||
gpuErrchk( cudaMemcpyFromSymbol(&host_function, ptr_sigmoid_derivative, sizeof(funcPtr)));
|
||||
break;
|
||||
|
||||
case LEAKY_RELU:
|
||||
gpuErrchk( cudaMemcpyFromSymbol(&host_function, ptr_leaky_relu, sizeof(funcPtr)));
|
||||
break;
|
||||
case -LEAKY_RELU:
|
||||
gpuErrchk( cudaMemcpyFromSymbol(&host_function, ptr_leaky_relu_derivative, sizeof(funcPtr)));
|
||||
break;
|
||||
|
||||
case TANH:
|
||||
gpuErrchk( cudaMemcpyFromSymbol(&host_function, ptr_tanh, sizeof(funcPtr)));
|
||||
break;
|
||||
case -TANH:
|
||||
gpuErrchk( cudaMemcpyFromSymbol(&host_function, ptr_tanh_derivative, sizeof(funcPtr)));
|
||||
break;
|
||||
|
||||
case SOFTMAX:
|
||||
printf_error((char*)"impossible de renvoyer la fonction softmax\n");
|
||||
return NULL;
|
||||
case -SOFTMAX:
|
||||
printf_error((char*)"impossible de renvoyer la dérivée de la fonction softmax\n");
|
||||
return NULL;
|
||||
|
||||
default:
|
||||
printf_error((char*)"fonction d'activation inconnue (get_activation_function_cuda): ");
|
||||
printf("%d\n", activation);
|
||||
return NULL;
|
||||
}
|
||||
return host_function;
|
||||
}
|
||||
#endif
|
@ -40,16 +40,16 @@ void backward_max_pooling(float*** input, float*** output, int input_width, int
|
||||
/*
|
||||
* Transfert les informations d'erreur à travers une couche fully connected
|
||||
*/
|
||||
void backward_dense(Kernel_nn* ker, float* input, float* input_z, float* output, int size_input, int size_output, ptr d_function, int is_first);
|
||||
void backward_dense(Kernel_nn* ker, float* input, float* input_z, float* output, int size_input, int size_output, funcPtr d_function, int is_first);
|
||||
|
||||
/*
|
||||
* Transfert les informations d'erreur à travers une couche de linéarisation
|
||||
*/
|
||||
void backward_linearisation(Kernel_nn* ker, float*** input, float*** input_z, float* output, int depth_input, int dim_input, int size_output, ptr d_function);
|
||||
void backward_linearisation(Kernel_nn* ker, float*** input, float*** input_z, float* output, int depth_input, int dim_input, int size_output, funcPtr d_function);
|
||||
|
||||
/*
|
||||
* Transfert les informations d'erreur à travers un couche de convolution
|
||||
*/
|
||||
void backward_convolution(Kernel_cnn* ker, float*** input, float*** input_z, float*** output, int depth_input, int dim_input, int depth_output, int dim_output, ptr d_function, int is_first);
|
||||
void backward_convolution(Kernel_cnn* ker, float*** input, float*** input_z, float*** output, int depth_input, int dim_input, int depth_output, int dim_output, funcPtr d_function, int is_first);
|
||||
|
||||
#endif
|
||||
|
@ -16,52 +16,132 @@
|
||||
#define RELU_CLIP_VALUE 15
|
||||
|
||||
|
||||
typedef float (*ptr)(float);
|
||||
typedef ptr (*pm)();
|
||||
typedef float (*funcPtr)(float);
|
||||
|
||||
//* Identité
|
||||
#ifdef __CUDACC__
|
||||
__device__ float device_identity(float x);
|
||||
__device__ float device_identity_derivative(float x);
|
||||
#endif
|
||||
|
||||
#ifdef __CUDACC__
|
||||
extern "C"
|
||||
#endif
|
||||
float identity(float x);
|
||||
|
||||
#ifdef __CUDACC__
|
||||
extern "C"
|
||||
#endif
|
||||
float identity_derivative(float x);
|
||||
|
||||
//* Sigmoid
|
||||
#ifdef __CUDACC__
|
||||
__device__ float device_sigmoid(float x);
|
||||
__device__ float device_sigmoid_derivative(float x);
|
||||
#endif
|
||||
|
||||
#ifdef __CUDACC__
|
||||
extern "C"
|
||||
#endif
|
||||
float sigmoid(float x);
|
||||
|
||||
#ifdef __CUDACC__
|
||||
extern "C"
|
||||
#endif
|
||||
float sigmoid_derivative(float x);
|
||||
|
||||
//* RELU
|
||||
#ifdef __CUDACC__
|
||||
__device__ float device_relu(float x);
|
||||
__device__ float device_relu_derivative(float x);
|
||||
#endif
|
||||
|
||||
#ifdef __CUDACC__
|
||||
extern "C"
|
||||
#endif
|
||||
float relu(float x);
|
||||
|
||||
#ifdef __CUDACC__
|
||||
extern "C"
|
||||
#endif
|
||||
float relu_derivative(float x);
|
||||
|
||||
//* Leaky RELU
|
||||
#ifdef __CUDACC__
|
||||
__device__ float device_leaky_relu(float x);
|
||||
__device__ float device_leaky_relu_derivative(float x);
|
||||
#endif
|
||||
|
||||
#ifdef __CUDACC__
|
||||
extern "C"
|
||||
#endif
|
||||
float leaky_relu(float x);
|
||||
|
||||
#ifdef __CUDACC__
|
||||
extern "C"
|
||||
#endif
|
||||
float leaky_relu_derivative(float x);
|
||||
|
||||
//* Tanh
|
||||
#ifdef __CUDACC__
|
||||
__device__ float device_tanh_(float x);
|
||||
__device__ float device_tanh_derivative(float x);
|
||||
#endif
|
||||
|
||||
#ifdef __CUDACC__
|
||||
extern "C"
|
||||
#endif
|
||||
float tanh_(float x);
|
||||
|
||||
#ifdef __CUDACC__
|
||||
extern "C"
|
||||
#endif
|
||||
float tanh_derivative(float x);
|
||||
|
||||
|
||||
#ifdef __CUDACC__
|
||||
extern "C"
|
||||
#endif
|
||||
/*
|
||||
* Applique softmax sur input[depth][rows][columns]
|
||||
*/
|
||||
void apply_softmax_input(float ***input, int depth, int rows, int columns);
|
||||
|
||||
#ifdef __CUDACC__
|
||||
extern "C"
|
||||
#endif
|
||||
/*
|
||||
* Applique la fonction f sur input[depth][rows][columns]
|
||||
*/
|
||||
void apply_function_input(float (*f)(float), float*** input, int depth, int rows, int columns);
|
||||
void apply_function_input(int activation, float*** input, int depth, int rows, int columns);
|
||||
|
||||
#ifdef __CUDACC__
|
||||
extern "C"
|
||||
#endif
|
||||
/*
|
||||
* Applique une fonction d'activation (repérée par son identifiant) à une matrice
|
||||
*/
|
||||
void apply_function_to_matrix(int activation, float*** input, int depth, int dim);
|
||||
|
||||
#ifdef __CUDACC__
|
||||
extern "C"
|
||||
#endif
|
||||
/*
|
||||
* Applique une fonction d'activation (repérée par son identifiant) à un vecteur
|
||||
*/
|
||||
void apply_function_to_vector(int activation, float*** input, int dim);
|
||||
|
||||
#ifdef __CUDACC__
|
||||
extern "C"
|
||||
#endif
|
||||
/*
|
||||
* Renvoie la fonction d'activation correspondant à son identifiant (activation)
|
||||
*/
|
||||
ptr get_activation_function(int activation);
|
||||
funcPtr get_activation_function(int activation);
|
||||
|
||||
/*
|
||||
* Récupère un pointeur sur le device vers la fonction d'activation demandée puis le transforme en pointeur sur l'host
|
||||
*/
|
||||
funcPtr get_activation_function_cuda(int activation);
|
||||
|
||||
#endif
|
60
test/cnn_function.cu
Normal file
60
test/cnn_function.cu
Normal file
@ -0,0 +1,60 @@
|
||||
#include <stdlib.h>
|
||||
#include <assert.h>
|
||||
#include <stdio.h>
|
||||
|
||||
#include "../src/include/memory_management.h"
|
||||
#include "../src/cnn/include/function.h"
|
||||
#include "../src/include/colors.h"
|
||||
|
||||
|
||||
int main() {
|
||||
printf("Initialisation\n");
|
||||
// Initialise values
|
||||
int depth = 10;
|
||||
int rows = 10;
|
||||
int columns = 10;
|
||||
|
||||
float*** input = (float***)nalloc(depth, sizeof(float**));
|
||||
float*** input_initial = (float***)malloc(depth*sizeof(float**));
|
||||
for (int i=0; i < depth; i++) {
|
||||
input[i] = (float**)nalloc(rows, sizeof(float*));
|
||||
input_initial[i] = (float**)malloc(rows*sizeof(float*));
|
||||
for (int j=0; j < rows; j++) {
|
||||
input[i][j] = (float*)nalloc(columns, sizeof(float));
|
||||
input_initial[i][j] = (float*)malloc(columns*sizeof(float));
|
||||
for (int k=0; k < columns; k++) {
|
||||
input[i][j][k] = rand()/RAND_MAX;
|
||||
input_initial[i][j][k] = input[i][j][k];
|
||||
}
|
||||
}
|
||||
}
|
||||
printf(GREEN "OK\n" RESET);
|
||||
|
||||
funcPtr func = get_activation_function(TANH);
|
||||
|
||||
printf("Calcul par CUDA\n");
|
||||
apply_function_input(TANH, input, depth, rows, columns);
|
||||
printf(GREEN "OK\n" RESET);
|
||||
|
||||
printf("Vérification des résultats\n");
|
||||
for (int i=0; i < depth; i++) {
|
||||
for (int j=0; j < rows; j++) {
|
||||
for (int k=0; k < columns; k++) {
|
||||
if (fabs((*func)(input_initial[i][j][k]) - input[i][j][k]) > 1e-6) {
|
||||
printf_error((char*)"Les résultats ne coincident pas\n");
|
||||
printf("Différence %e\n", fabs((*func)(input_initial[i][j][k]) - input[i][j][k]));
|
||||
//exit(1);
|
||||
}
|
||||
}
|
||||
gree(input[i][j]);
|
||||
free(input_initial[i][j]);
|
||||
}
|
||||
gree(input[i]);
|
||||
free(input_initial[i]);
|
||||
}
|
||||
gree(input);
|
||||
free(input_initial);
|
||||
|
||||
printf(GREEN "OK\n" RESET);
|
||||
return 0;
|
||||
}
|
Loading…
Reference in New Issue
Block a user