Add 'stride' and 'padding' to the forward

This commit is contained in:
julienChemillier 2023-05-13 13:37:46 +02:00
parent a68805894f
commit f316882eeb
10 changed files with 272 additions and 150 deletions

View File

@ -177,6 +177,8 @@ void forward_propagation(Network* network) {
int activation = k_i->activation;
int pooling = k_i->pooling;
int stride = k_i->stride;
int padding = k_i->padding;
if (k_i->nn) {
drop_neurones(input, 1, 1, input_width, network->dropout);
@ -189,29 +191,33 @@ void forward_propagation(Network* network) {
* On copie les valeurs de output dans output_z, puis on applique la fonction d'activation à output_z
*/
if (k_i->cnn) { // Convolution
make_convolution(k_i->cnn, input, output, output_width, 1);
make_convolution(k_i->cnn, input, output, output_width, stride, padding);
copy_3d_array(output, output_z, output_depth, output_width, output_width);
apply_function_to_matrix(activation, output, output_depth, output_width);
}
else if (k_i->nn) { // Full connection
if (k_i->linearisation == DOESNT_LINEARISE) { // Vecteur -> Vecteur
make_dense(k_i->nn, input[0][0], output[0][0], input_width, output_width);
} else { // Matrice -> Vecteur
}
else { // Matrice -> Vecteur
make_dense_linearized(k_i->nn, input, output[0][0], input_depth, input_width, output_width);
}
copy_3d_array(output, output_z, 1, 1, output_width);
apply_function_to_vector(activation, output, output_width);
}
else { // Pooling
int kernel_size = 2*padding + input_width + stride - output_width*stride;
if (i == n-2) {
printf_error("Le réseau ne peut pas finir par un pooling layer\n");
return;
} else { // Pooling sur une matrice
if (pooling == AVG_POOLING) {
make_average_pooling(input, output, input_width/output_width, output_depth, output_width, input_width/output_width);
} else if (pooling == MAX_POOLING) {
make_max_pooling(input, output, input_width/output_width, output_depth, output_width, input_width/output_width);
} else {
make_average_pooling(input, output, kernel_size, output_depth, output_width, stride, padding);
}
else if (pooling == MAX_POOLING) {
make_max_pooling(input, output, kernel_size, output_depth, output_width, stride, padding);
}
else {
printf_error("Impossible de reconnaître le type de couche de pooling: ");
printf("identifiant: %d, position: %d\n", pooling, i);
}

View File

@ -8,14 +8,23 @@
#include "include/config.h"
void make_convolution_cpu(Kernel_cnn* kernel, float*** input, float*** output, int output_dim, int stride) {
int convolution_not_outside(int x, int y, int lower_bound, int upper_bound) {
// On renvoie true si et seulement si _ et _:
// lower_bound <= x < upper_bound
// lower_bound <= y < upper_bound
return !(x < lower_bound || y < lower_bound || x >= upper_bound || y>= upper_bound);
}
void make_convolution_cpu(Kernel_cnn* kernel, float*** input, float*** output, int output_dim, int stride, int padding) {
// c'est le kernel de input
// input[kernel->rows][kernel_k_size + output_dim-1][kernel_k_size + output_dim-1]
// output[kernel->columns][output_dim][output_dim]
int k_size = kernel->k_size;
int k_columns = kernel->columns;
int k_rows = kernel->rows;
int max_move = kernel->k_size - padding;
int input_dim = output_dim*stride - 2*padding + kernel->k_size - stride;
float f;
for (int i=0; i < k_columns; i++) { // filtre
@ -23,12 +32,16 @@ void make_convolution_cpu(Kernel_cnn* kernel, float*** input, float*** output, i
for (int k=0; k < output_dim; k++) { // colonne de sortie
f = kernel->bias[i][j][k];
for (int a=0; a < k_rows; a++) { // Canal de couleur
for (int b=0; b < k_size; b++) { // ligne du filtre
for (int c=0; c < k_size; c++) { // colonne du filtre
for (int b=-padding; b < max_move; b++) { // ligne du filtre
for (int c=-padding; c < max_move; c++) { // colonne du filtre
int x = (stride*j+b);
int y = (stride*k+c);
if (convolution_not_outside(x, y, 0, input_dim)) {
f += kernel->weights[a][i][b][c]*input[a][stride*j+b][stride*k+c];
}
}
}
}
output[i][j][k] = f;
}
}
@ -37,11 +50,13 @@ void make_convolution_cpu(Kernel_cnn* kernel, float*** input, float*** output, i
#ifdef __CUDACC__
__global__ void make_convolution_kernel(Kernel_cnn* kernel, float*** input, float*** output, int output_dim, int stride) {
__global__ void make_convolution_kernel(Kernel_cnn* kernel, float*** input, float*** output, int output_dim, int stride, int padding) {
// Équivalents respectifs de i, j et k dans la boucle effectuée par le cpu
int idx = threadIdx.x + blockDim.x*blockIdx.x; // < kernel->columns
int idy = threadIdx.y + blockDim.y*blockIdx.y; // < min(output_dim, k_size)
int idz = threadIdx.z + blockDim.z*blockIdx.z; // < min(output_dim, k_size)
int max_move = kernel->k_size - padding;
int input_dim = output_dim*stride - 2*padding + kernel->k_size - stride;
if (idx >= kernel->columns || idy >= output_dim || idz >= output_dim) {
return;
@ -50,9 +65,13 @@ __global__ void make_convolution_kernel(Kernel_cnn* kernel, float*** input, floa
float f = kernel->bias[idx][idy][idz];
for (int a=0; a < kernel->rows; a++) {
for (int b=0; b < kernel->k_size; b++) {
for (int c=0; c < kernel->k_size; c++) {
f += kernel->weights[a][idx][b][c]*input[a][idy*stride+b][idz*stride+c];
for (int b=-padding; b < max_move; b++) {
for (int c=-padding; c < max_move; c++) {
int idy_2 = idy*stride+b;
int idz_2 = idz*stride+c;
if (convolution_not_outside(idy_2, idz_2, 0, input_dim)) {
f += kernel->weights[a][idx][b][c]*input[a][idy_2][idz_2];
}
}
}
}
@ -60,21 +79,21 @@ __global__ void make_convolution_kernel(Kernel_cnn* kernel, float*** input, floa
output[idx][idy][idz] = f;
}
void make_convolution_device(Kernel_cnn* kernel, float*** input, float*** output, int output_dim, int stride) {
void make_convolution_device(Kernel_cnn* kernel, float*** input, float*** output, int output_dim, int stride, int padding) {
// Make computation
dim3 gridSize(i_div_up(kernel->columns, BLOCKSIZE_x), i_div_up(output_dim, BLOCKSIZE_y), i_div_up(output_dim, BLOCKSIZE_z));
dim3 blockSize(BLOCKSIZE_x, BLOCKSIZE_y, BLOCKSIZE_z);
make_convolution_kernel<<<gridSize, blockSize>>>(kernel, input, output, output_dim, stride);
make_convolution_kernel<<<gridSize, blockSize>>>(kernel, input, output, output_dim, stride, padding);
gpuErrchk( cudaPeekAtLastError() );
gpuErrchk( cudaDeviceSynchronize() );
}
#endif
void make_convolution(Kernel_cnn* kernel, float*** input, float*** output, int output_dim, int stride) {
void make_convolution(Kernel_cnn* kernel, float*** input, float*** output, int output_dim, int stride, int padding) {
#ifndef __CUDACC__
make_convolution_cpu(kernel, input, output, output_dim, stride);
make_convolution_cpu(kernel, input, output, output_dim, stride, padding);
#else
make_convolution_device(kernel, input, output, output_dim, stride);
make_convolution_device(kernel, input, output, output_dim, stride, padding);
#endif
}

View File

@ -8,14 +8,23 @@
#include "include/config.h"
void make_convolution_cpu(Kernel_cnn* kernel, float*** input, float*** output, int output_dim, int stride) {
int convolution_not_outside(int x, int y, int lower_bound, int upper_bound) {
// On renvoie true si et seulement si _ et _:
// lower_bound <= x < upper_bound
// lower_bound <= y < upper_bound
return !(x < lower_bound || y < lower_bound || x >= upper_bound || y>= upper_bound);
}
void make_convolution_cpu(Kernel_cnn* kernel, float*** input, float*** output, int output_dim, int stride, int padding) {
// c'est le kernel de input
// input[kernel->rows][kernel_k_size + output_dim-1][kernel_k_size + output_dim-1]
// output[kernel->columns][output_dim][output_dim]
int k_size = kernel->k_size;
int k_columns = kernel->columns;
int k_rows = kernel->rows;
int max_move = kernel->k_size - padding;
int input_dim = output_dim*stride - 2*padding + kernel->k_size - stride;
float f;
for (int i=0; i < k_columns; i++) { // filtre
@ -23,12 +32,16 @@ void make_convolution_cpu(Kernel_cnn* kernel, float*** input, float*** output, i
for (int k=0; k < output_dim; k++) { // colonne de sortie
f = kernel->bias[i][j][k];
for (int a=0; a < k_rows; a++) { // Canal de couleur
for (int b=0; b < k_size; b++) { // ligne du filtre
for (int c=0; c < k_size; c++) { // colonne du filtre
for (int b=-padding; b < max_move; b++) { // ligne du filtre
for (int c=-padding; c < max_move; c++) { // colonne du filtre
int x = (stride*j+b);
int y = (stride*k+c);
if (convolution_not_outside(x, y, 0, input_dim)) {
f += kernel->weights[a][i][b][c]*input[a][stride*j+b][stride*k+c];
}
}
}
}
output[i][j][k] = f;
}
}
@ -37,11 +50,13 @@ void make_convolution_cpu(Kernel_cnn* kernel, float*** input, float*** output, i
#ifdef __CUDACC__
__global__ void make_convolution_kernel(Kernel_cnn* kernel, float*** input, float*** output, int output_dim, int stride) {
__global__ void make_convolution_kernel(Kernel_cnn* kernel, float*** input, float*** output, int output_dim, int stride, int padding) {
// Équivalents respectifs de i, j et k dans la boucle effectuée par le cpu
int idx = threadIdx.x + blockDim.x*blockIdx.x; // < kernel->columns
int idy = threadIdx.y + blockDim.y*blockIdx.y; // < min(output_dim, k_size)
int idz = threadIdx.z + blockDim.z*blockIdx.z; // < min(output_dim, k_size)
int max_move = kernel->k_size - padding;
int input_dim = output_dim*stride - 2*padding + kernel->k_size - stride;
if (idx >= kernel->columns || idy >= output_dim || idz >= output_dim) {
return;
@ -50,9 +65,13 @@ __global__ void make_convolution_kernel(Kernel_cnn* kernel, float*** input, floa
float f = kernel->bias[idx][idy][idz];
for (int a=0; a < kernel->rows; a++) {
for (int b=0; b < kernel->k_size; b++) {
for (int c=0; c < kernel->k_size; c++) {
f += kernel->weights[a][idx][b][c]*input[a][idy*stride+b][idz*stride+c];
for (int b=-padding; b < max_move; b++) {
for (int c=-padding; c < max_move; c++) {
int idy_2 = idy*stride+b;
int idz_2 = idz*stride+c;
if (convolution_not_outside(idy_2, idz_2, 0, input_dim)) {
f += kernel->weights[a][idx][b][c]*input[a][idy_2][idz_2];
}
}
}
}
@ -60,21 +79,21 @@ __global__ void make_convolution_kernel(Kernel_cnn* kernel, float*** input, floa
output[idx][idy][idz] = f;
}
void make_convolution_device(Kernel_cnn* kernel, float*** input, float*** output, int output_dim, int stride) {
void make_convolution_device(Kernel_cnn* kernel, float*** input, float*** output, int output_dim, int stride, int padding) {
// Make computation
dim3 gridSize(i_div_up(kernel->columns, BLOCKSIZE_x), i_div_up(output_dim, BLOCKSIZE_y), i_div_up(output_dim, BLOCKSIZE_z));
dim3 blockSize(BLOCKSIZE_x, BLOCKSIZE_y, BLOCKSIZE_z);
make_convolution_kernel<<<gridSize, blockSize>>>(kernel, input, output, output_dim, stride);
make_convolution_kernel<<<gridSize, blockSize>>>(kernel, input, output, output_dim, stride, padding);
gpuErrchk( cudaPeekAtLastError() );
gpuErrchk( cudaDeviceSynchronize() );
}
#endif
void make_convolution(Kernel_cnn* kernel, float*** input, float*** output, int output_dim, int stride) {
void make_convolution(Kernel_cnn* kernel, float*** input, float*** output, int output_dim, int stride, int padding) {
#ifndef __CUDACC__
make_convolution_cpu(kernel, input, output, output_dim, stride);
make_convolution_cpu(kernel, input, output, output_dim, stride, padding);
#else
make_convolution_device(kernel, input, output, output_dim, stride);
make_convolution_device(kernel, input, output, output_dim, stride, padding);
#endif
}

View File

@ -40,10 +40,10 @@ Network* create_network(int max_size, float learning_rate, int dropout, int acti
Network* create_network_lenet5(float learning_rate, int dropout, int activation, int initialisation, int input_dim, int input_depth) {
Network* network = create_network(8, learning_rate, dropout, activation, initialisation, input_dim, input_depth);
add_convolution(network, 6, 28, activation);
add_average_pooling(network, 14);
add_convolution(network, 16, 10, activation);
add_average_pooling(network, 5);
add_convolution(network, 5, 6, 1, 0, activation);
add_average_pooling(network, 2, 2, 0);
add_convolution(network, 5, 16, 1, 0, activation);
add_average_pooling(network, 2, 2, 0);
add_dense_linearisation(network, 120, activation);
add_dense(network, 84, activation);
add_dense(network, 10, SOFTMAX);
@ -97,51 +97,51 @@ void create_a_line_input_z_layer(Network* network, int pos, int dim) {
network->depth[pos] = 1;
}
void add_average_pooling(Network* network, int dim_output) {
void add_average_pooling(Network* network, int kernel_size, int stride, int padding) {
int n = network->size;
int k_pos = n-1;
int dim_input = network->width[k_pos];
if (network->max_size == n) {
printf_error("Impossible de rajouter une couche d'average pooling, le réseau est déjà plein\n");
return;
}
if (dim_input%dim_output != 0) {
printf_error("Dimension de l'average pooling incorrecte\n");
return;
}
int dim_input = network->width[k_pos];
int dim_output = (2*padding + dim_input - (kernel_size - stride))/stride;
network->kernel[k_pos]->cnn = NULL;
network->kernel[k_pos]->nn = NULL;
network->kernel[k_pos]->stride = stride;
network->kernel[k_pos]->padding = padding;
network->kernel[k_pos]->activation = IDENTITY; // Ne contient pas de fonction d'activation
network->kernel[k_pos]->linearisation = DOESNT_LINEARISE;
network->kernel[k_pos]->pooling = AVG_POOLING;
create_a_cube_input_layer(network, n, network->depth[n-1], network->width[n-1]/2);
create_a_cube_input_z_layer(network, n, network->depth[n-1], network->width[n-1]/2);
create_a_cube_input_layer(network, n, network->depth[n-1], dim_output);
create_a_cube_input_z_layer(network, n, network->depth[n-1], dim_output);
network->size++;
}
void add_max_pooling(Network* network, int dim_output) {
void add_max_pooling(Network* network, int kernel_size, int stride, int padding) {
int n = network->size;
int k_pos = n-1;
int dim_input = network->width[k_pos];
if (network->max_size == n) {
printf_error("Impossible de rajouter une couche de max pooling, le réseau est déjà plein\n");
return;
}
if (dim_input%dim_output != 0) {
printf_error("Dimension du max pooling incorrecte\n");
return;
}
int dim_input = network->width[k_pos];
int dim_output = (2*padding + dim_input - (kernel_size - stride))/stride;
network->kernel[k_pos]->cnn = NULL;
network->kernel[k_pos]->nn = NULL;
network->kernel[k_pos]->activation = IDENTITY; // Ne contient pas de fonction d'activation
network->kernel[k_pos]->linearisation = DOESNT_LINEARISE;
network->kernel[k_pos]->pooling = MAX_POOLING;
create_a_cube_input_layer(network, n, network->depth[n-1], network->width[n-1]/2);
create_a_cube_input_z_layer(network, n, network->depth[n-1], network->width[n-1]/2);
create_a_cube_input_layer(network, n, network->depth[n-1], dim_output);
create_a_cube_input_z_layer(network, n, network->depth[n-1], dim_output);
network->size++;
}
void add_convolution(Network* network, int depth_output, int dim_output, int activation) {
void add_convolution(Network* network, int kernel_size, int number_of_kernels, int stride, int padding, int activation) {
int n = network->size;
int k_pos = n-1;
if (network->max_size == n) {
@ -151,18 +151,24 @@ void add_convolution(Network* network, int depth_output, int dim_output, int act
int depth_input = network->depth[k_pos];
int dim_input = network->width[k_pos];
int dim_output = (2*padding + dim_input - (kernel_size - stride))/stride;
int depth_output = number_of_kernels;
int bias_size = dim_output;
int kernel_size = dim_input - dim_output +1;
network->kernel[k_pos]->nn = NULL;
network->kernel[k_pos]->stride = stride;
network->kernel[k_pos]->padding = padding;
network->kernel[k_pos]->activation = activation;
network->kernel[k_pos]->linearisation = DOESNT_LINEARISE;
network->kernel[k_pos]->pooling = NO_POOLING;
network->kernel[k_pos]->cnn = (Kernel_cnn*)nalloc(1, sizeof(Kernel_cnn));
Kernel_cnn* cnn = network->kernel[k_pos]->cnn;
cnn->k_size = kernel_size;
cnn->rows = depth_input;
cnn->columns = depth_output;
cnn->weights = (float****)nalloc(depth_input, sizeof(float***));
cnn->d_weights = (float****)nalloc(depth_input, sizeof(float***));
#ifdef ADAM_CNN_WEIGHTS
@ -200,6 +206,7 @@ void add_convolution(Network* network, int depth_output, int dim_output, int act
}
}
}
cnn->bias = (float***)nalloc(depth_output, sizeof(float**));
cnn->d_bias = (float***)nalloc(depth_output, sizeof(float**));
#ifdef ADAM_CNN_BIAS
@ -229,6 +236,7 @@ void add_convolution(Network* network, int depth_output, int dim_output, int act
}
}
}
int n_in = network->width[n-1]*network->width[n-1]*network->depth[n-1];
int n_out = network->width[n]*network->width[n]*network->depth[n];
initialisation_3d_matrix(network->initialisation, cnn->bias, depth_output, dim_output, dim_output, n_in, n_out);
@ -247,13 +255,17 @@ void add_dense(Network* network, int size_output, int activation) {
return;
}
network->kernel[k_pos]->cnn = NULL;
network->kernel[k_pos]->nn = (Kernel_nn*)nalloc(1, sizeof(Kernel_nn));
Kernel_nn* nn = network->kernel[k_pos]->nn;
network->kernel[k_pos]->stride = -1; // N'est pas utilisé dans une couche dense
network->kernel[k_pos]->padding = -1; // N'est pas utilisé dans une couche dense
network->kernel[k_pos]->activation = activation;
network->kernel[k_pos]->linearisation = DOESNT_LINEARISE;
network->kernel[k_pos]->pooling = NO_POOLING;
network->kernel[k_pos]->nn = (Kernel_nn*)nalloc(1, sizeof(Kernel_nn));
Kernel_nn* nn = network->kernel[k_pos]->nn;
nn->size_input = size_input;
nn->size_output = size_output;
nn->bias = (float*)nalloc(size_output, sizeof(float));
nn->d_bias = (float*)nalloc(size_output, sizeof(float));
#ifdef ADAM_DENSE_BIAS
@ -308,11 +320,14 @@ void add_dense_linearisation(Network* network, int size_output, int activation)
return;
}
network->kernel[k_pos]->cnn = NULL;
network->kernel[k_pos]->nn = (Kernel_nn*)nalloc(1, sizeof(Kernel_nn));
Kernel_nn* nn = network->kernel[k_pos]->nn;
network->kernel[k_pos]->stride = -1; // N'est pas utilisé dans une couche dense
network->kernel[k_pos]->padding = -1; // N'est pas utilisé dans une couche dense
network->kernel[k_pos]->activation = activation;
network->kernel[k_pos]->linearisation = DO_LINEARISE;
network->kernel[k_pos]->pooling = NO_POOLING;
network->kernel[k_pos]->nn = (Kernel_nn*)nalloc(1, sizeof(Kernel_nn));
Kernel_nn* nn = network->kernel[k_pos]->nn;
nn->size_input = size_input;
nn->size_output = size_output;
@ -329,6 +344,7 @@ void add_dense_linearisation(Network* network, int size_output, int activation)
nn->v_d_bias[i] = 0.;
#endif
}
nn->weights = (float**)nalloc(size_input, sizeof(float*));
nn->d_weights = (float**)nalloc(size_input, sizeof(float*));
#ifdef ADAM_DENSE_WEIGHTS
@ -350,6 +366,7 @@ void add_dense_linearisation(Network* network, int size_output, int activation)
#endif
}
}
initialisation_1d_matrix(network->initialisation, nn->bias, size_output, size_input, size_output);
initialisation_2d_matrix(network->initialisation, nn->weights, size_input, size_output, size_input, size_output);
create_a_line_input_layer(network, n, size_output);

View File

@ -3,21 +3,21 @@
/*
* Effectue la convolution naïvement sur le processeur
*/
void make_convolution_cpu(Kernel_cnn* kernel, float*** input, float*** output, int output_dim, int stride);
void make_convolution_cpu(Kernel_cnn* kernel, float*** input, float*** output, int output_dim, int stride, int padding);
#ifdef __CUDACC__
/*
* Kernel de la convolution sur carte graphique
*/
__global__ void make_convolution_kernel(int k_size, int columns, int rows, float* bias, size_t pitch_bias, float**** weights, size_t pitch_weights, float*** input, size_t pitch_input, float*** output, size_t pitch_output, int output_dim, int stride);
__global__ void make_convolution_kernel(int k_size, int columns, int rows, float* bias, size_t pitch_bias, float**** weights, size_t pitch_weights, float*** input, size_t pitch_input, float*** output, size_t pitch_output, int output_dim, int stride, int padding);
/*
* Effectue la convolution naïvement sur la carte graphique
*/
void make_convolution_device(Kernel_cnn* kernel, float*** input, float*** output, int output_dim, int stride);
void make_convolution_device(Kernel_cnn* kernel, float*** input, float*** output, int output_dim, int stride, int padding);
#endif
/*
* Détermine si la convolution peut-être faite sur la carte graphique au moment de la compilation
*/
void make_convolution(Kernel_cnn* kernel, float*** input, float*** output, int output_dim, int stride);
void make_convolution(Kernel_cnn* kernel, float*** input, float*** output, int output_dim, int stride, int padding);

View File

@ -35,19 +35,24 @@ void create_a_cube_input_z_layer(Network* network, int pos, int depth, int dim);
void create_a_line_input_layer(Network* network, int pos, int dim);
/*
* Ajoute au réseau une couche d'average pooling valide de dimension dim*dim
* Ajoute au réseau une couche d'average pooling avec la taille de noyau (kernel_size),
* le remplissage (padding) et le décalge (stride) choisis
*/
void add_average_pooling(Network* network, int dim_output);
void add_average_pooling(Network* network, int kernel_size, int stride, int padding);
/*
* Ajoute au réseau une couche de max pooling valide de dimension dim*dim
* Ajoute au réseau une couche de max pooling avec la taille de noyau (kernel_size),
* le remplissage (padding) et le décalge (stride) choisis
*/
void add_max_pooling(Network* network, int dim_output);
void add_max_pooling(Network* network, int kernel_size, int stride, int padding);
/*
* Ajoute au réseau une couche de convolution dim*dim et initialise les kernels
* Ajoute au réseau une couche de convolution avec la taille de noyau (kernel_size),
* le remplissage (padding) et le décalge (stride) choisis. Le choix de la profondeur de
* la couche suivante se fait avec number_of_kernels (= depth_output)
* Puis initialise les poids et les biais construits
*/
void add_convolution(Network* network, int depth_output, int dim_output, int activation);
void add_convolution(Network* network, int kernel_size, int number_of_kernels, int stride, int padding, int activation);
/*
* Ajoute au réseau une couche dense et initialise les poids et les biais

View File

@ -4,36 +4,44 @@
#define DEF_MAKE_H
/*
* Effectue une convolution sans stride sur le processeur
*
* On renvoie true si et seulement si _ et _:
* lower_bound <= y < upper_bound
* lower_bound <= x < upper_bound
*/
void make_convolution_cpu(Kernel_cnn* kernel, float*** input, float*** output, int output_dim, int stride);
int pooling_not_outside(int x, int y, int lower_bound, int upper_bound);
/*
* Effectue la convolution sur le CPU ou GPU
* Effectue la propagation d'une convolution avec stride et padding choisis sur le processeur
*/
void make_convolution(Kernel_cnn* kernel, float*** input, float*** output, int output_dim, int stride);
void make_convolution_cpu(Kernel_cnn* kernel, float*** input, float*** output, int output_dim, int stride, int padding);
/*
* Effectue la propagation d'une convolution avec stride et padding choisis sur le CPU ou GPU
*/
void make_convolution(Kernel_cnn* kernel, float*** input, float*** output, int output_dim, int stride, int padding);
#ifdef __CUDACC__
extern "C"
#endif
/*
* Effectue un average pooling avec stride=size
* Effectue propagation d'average pooling avec stride et padding choisis
*/
void make_average_pooling(float*** input, float*** output, int size, int output_depth, int output_dim, int stride);
void make_average_pooling(float*** input, float*** output, int size, int output_depth, int output_dim, int stride, int padding);
#ifdef __CUDACC__
extern "C"
#endif
/*
* Effectue un max pooling avec stride=size
* Effectue propagation de max pooling avec stride et padding choisis
*/
void make_max_pooling(float*** input, float*** output, int size, int output_depth, int output_dim, int stride);
void make_max_pooling(float*** input, float*** output, int size, int output_depth, int output_dim, int stride, int padding);
#ifdef __CUDACC__
extern "C"
#endif
/*
* Effectue une full connection
* Effectue la propagation d'une couche dense
*/
void make_dense(Kernel_nn* kernel, float* input, float* output, int size_input, int size_output);
@ -41,7 +49,7 @@ void make_dense(Kernel_nn* kernel, float* input, float* output, int size_input,
extern "C"
#endif
/*
* Effectue une full connection qui passe d'une matrice à un vecteur
* Effectue la propagation d'une couche dense qui passe d'une matrice à un vecteur
*/
void make_dense_linearized(Kernel_nn* kernel, float*** input, float* output, int depth_input, int dim_input, int size_output);

View File

@ -16,14 +16,14 @@ typedef struct Kernel_cnn {
int rows; // Depth de l'input
int columns; // Depth de l'output
float*** bias; // bias[columns][dim_output][dim_output]
float*** bias; // bias[columns][dim_output][dim_output] <=> bias[depth output][dim output][dim output]
float*** d_bias; // d_bias[columns][dim_output][dim_output]
#ifdef ADAM_CNN_BIAS
float*** s_d_bias; // s_d_bias[columns][dim_output][dim_output]
float*** v_d_bias; // v_d_bias[columns][dim_output][dim_output]
#endif
float**** weights; // weights[rows][columns][k_size][k_size]
float**** weights; // weights[rows][columns][k_size][k_size] <=> weights[depth input][depth output][size kernel][size kernel]
float**** d_weights; // d_weights[rows][columns][k_size][k_size]
#ifdef ADAM_CNN_WEIGHTS
float**** s_d_weights; // s_d_weights[rows][columns][k_size][k_size]
@ -58,6 +58,8 @@ typedef struct Kernel {
int activation; // Id de la fonction d'activation et -Id de sa dérivée
int linearisation; // 1 si c'est la linéarisation d'une couche, 0 sinon
int pooling; // 0 si pas pooling, 1 si average_pooling, 2 si max_pooling
int stride; // Valable uniquement une pooling et un cnn
int padding; // Valable uniquement une pooling et un cnn
} Kernel;

View File

@ -10,59 +10,72 @@
#include "include/config.h"
int pooling_not_outside(int x, int y, int lower_bound, int upper_bound) {
return !(x < lower_bound || y < lower_bound || x >= upper_bound || y>= upper_bound);
}
/*
* Average Pooling
*/
#ifdef __CUDACC__
__global__ void make_average_pooling_kernel(float*** input, float*** output, int size, int output_depth, int output_width, int stride) {
__global__ void make_average_pooling_kernel(float*** input, float*** output, int size, int output_depth, int output_width, int stride, int padding) {
// Équivalents respectifs de i, j et k dans la boucle effectuée par le cpu
int idx = threadIdx.x + blockDim.x*blockIdx.x; // < output_depth
int idy = threadIdx.y + blockDim.y*blockIdx.y; // < output_width
int idz = threadIdx.z + blockDim.z*blockIdx.z; // < output_width
int n = size*size;
int max_move = size - padding;
if (idx >= output_depth || idy >= output_width || idz >= output_width) {
return;
}
int nb_elements = 0;
float sum = 0;
for (int a=0; a < size; a++) {
for (int b=0; b < size; b++) {
sum += input[idx][stride*idy +a][stride*idz +b];
for (int a=-padding; a < max_move; a++) {
for (int b=-padding; b < max_move; b++) {
int idy_2 = stride*idy +a;
int idz_2 = stride*idz +b;
if (pooling_not_outside(idy_2, idz_2, 0, output_width)) {
sum += input[idx][idy_2][idz_2];
}
}
output[idx][idy][idz] = sum/(float)n;
}
output[idx][idy][idz] = sum/(float)nb_elements;
}
void make_average_pooling_device(float*** input, float*** output, int size, int output_depth, int output_width, int stride) {
void make_average_pooling_device(float*** input, float*** output, int size, int output_depth, int output_width, int stride, int padding) {
// Make computation
dim3 gridSize(i_div_up(output_depth, BLOCKSIZE_x), i_div_up(output_width, BLOCKSIZE_y), i_div_up(output_width, BLOCKSIZE_z));
dim3 blockSize(BLOCKSIZE_x, BLOCKSIZE_y, BLOCKSIZE_z);
make_average_pooling_kernel<<<gridSize, blockSize>>>(input, output, size, output_depth, output_width, stride);
make_average_pooling_kernel<<<gridSize, blockSize>>>(input, output, size, output_depth, output_width, stride, padding);
gpuErrchk( cudaPeekAtLastError() );
gpuErrchk( cudaDeviceSynchronize() );
}
#endif
void make_average_pooling_cpu(float*** input, float*** output, int size, int output_depth, int output_width, int stride) {
void make_average_pooling_cpu(float*** input, float*** output, int size, int output_depth, int output_width, int stride, int padding) {
// input[output_depth][output_width+size-1][output_width+size-1]
// output[output_depth][output_width][output_width]
float sum;
int n = size*size;
int max_move = size - padding;
for (int i=0; i < output_depth; i++) {
for (int j=0; j < output_width; j++) {
for (int k=0; k < output_width; k++) {
sum = 0;
for (int a=0; a < size; a++) {
for (int b=0; b < size; b++) {
sum += input[i][stride*j +a][stride*k +b];
float sum = 0.;
int nb_elements = 0;
for (int a=-padding; a < max_move; a++) {
for (int b=-padding; b < max_move; b++) {
int j_2 = stride*j +a;
int k_2 = stride*k +b;
if (pooling_not_outside(j_2, k_2, 0, output_width)) {
sum += input[i][j_2][k_2];
nb_elements++;
}
}
output[i][j][k] = sum/(float)n;
}
output[i][j][k] = sum/(float)nb_elements;
}
}
}
@ -71,11 +84,11 @@ void make_average_pooling_cpu(float*** input, float*** output, int size, int out
#ifdef __CUDACC__
extern "C"
#endif
void make_average_pooling(float*** input, float*** output, int size, int output_depth, int output_width, int stride) {
void make_average_pooling(float*** input, float*** output, int size, int output_depth, int output_width, int stride, int padding) {
#ifndef __CUDACC__
make_average_pooling_cpu(input, output, size, output_depth, output_width, stride);
make_average_pooling_cpu(input, output, size, output_depth, output_width, stride, padding);
#else
make_average_pooling_device(input, output, size, output_depth, output_width, stride);
make_average_pooling_device(input, output, size, output_depth, output_width, stride, padding);
#endif
}
@ -87,7 +100,7 @@ void make_average_pooling(float*** input, float*** output, int size, int output_
* Max Pooling
*/
#ifdef __CUDACC__
__global__ void make_max_pooling_kernel(float*** input, float*** output, int size, int output_depth, int output_width, int stride) {
__global__ void make_max_pooling_kernel(float*** input, float*** output, int size, int output_depth, int output_width, int stride, int padding) {
// Équivalents respectifs de i, j et k dans la boucle effectuée par le cpu
int idx = threadIdx.x + blockDim.x*blockIdx.x; // < output_depth
int idy = threadIdx.y + blockDim.y*blockIdx.y; // < output_width
@ -97,40 +110,50 @@ __global__ void make_max_pooling_kernel(float*** input, float*** output, int siz
return;
}
int max_move = size - padding;
float m = -FLT_MAX;
float temp;
for (int a=0; a < size; a++) {
for (int b=0; b < size; b++) {
temp = input[idx][stride*idy +a][stride*idz +b];
for (int a=-padding; a < max_move; a++) {
for (int b=-padding; b < max_move; b++) {
int idy_2 = stride*idy +a;
int idz_2 = stride*idz +b;
if (pooling_not_outside(idy_2, idz_2, 0, output_width)) {
temp = input[idx][idy_2][idz_2];
m = m > temp ? m : temp; // max(m, temp)
}
}
}
output[idx][idy][idz] = m;
}
void make_max_pooling_device(float*** input, float*** output, int size, int output_depth, int output_width, int stride) {
void make_max_pooling_device(float*** input, float*** output, int size, int output_depth, int output_width, int stride, int padding) {
// Make computation
dim3 gridSize(i_div_up(output_depth, BLOCKSIZE_x), i_div_up(output_width, BLOCKSIZE_y), i_div_up(output_width, BLOCKSIZE_z));
dim3 blockSize(BLOCKSIZE_x, BLOCKSIZE_y, BLOCKSIZE_z);
make_max_pooling_kernel<<<gridSize, blockSize>>>(input, output, size, output_depth, output_width, stride);
make_max_pooling_kernel<<<gridSize, blockSize>>>(input, output, size, output_depth, output_width, stride, int padding);
gpuErrchk( cudaPeekAtLastError() );
gpuErrchk( cudaDeviceSynchronize() );
}
#endif
void make_max_pooling_cpu(float*** input, float*** output, int size, int output_depth, int output_width, int stride) {
void make_max_pooling_cpu(float*** input, float*** output, int size, int output_depth, int output_width, int stride, int padding) {
// input[output_depth][output_width+size-1][output_width+size-1]
// output[output_depth][output_width][output_width]
int max_move = size - padding;
float m;
for (int i=0; i < output_depth; i++) {
for (int j=0; j < output_width; j++) {
for (int k=0; k < output_width; k++) {
m = -FLT_MAX;
for (int a=0; a < size; a++) {
for (int b=0; b < size; b++) {
m = fmaxf(m, input[i][stride*j +a][stride*k +b]);
for (int a=-padding; a < max_move; a++) {
for (int b=-padding; b < max_move; b++) {
int j_2 = stride*j +a;
int k_2 = stride*k +b;
if (pooling_not_outside(j_2, k_2, 0, output_width)) {
m = fmaxf(m, input[i][j_2][k_2]);
}
}
}
output[i][j][k] = m;
@ -142,11 +165,11 @@ void make_max_pooling_cpu(float*** input, float*** output, int size, int output_
#ifdef __CUDACC__
extern "C"
#endif
void make_max_pooling(float*** input, float*** output, int size, int output_depth, int output_width, int stride) {
void make_max_pooling(float*** input, float*** output, int size, int output_depth, int output_width, int stride, int padding) {
#ifndef __CUDACC__
make_max_pooling_cpu(input, output, size, output_depth, output_width, stride);
make_max_pooling_cpu(input, output, size, output_depth, output_width, stride, padding);
#else
make_max_pooling_device(input, output, size, output_depth, output_width, stride);
make_max_pooling_device(input, output, size, output_depth, output_width, stride, padding);
#endif
}

View File

@ -10,59 +10,72 @@
#include "include/config.h"
int pooling_not_outside(int x, int y, int lower_bound, int upper_bound) {
return !(x < lower_bound || y < lower_bound || x >= upper_bound || y>= upper_bound);
}
/*
* Average Pooling
*/
#ifdef __CUDACC__
__global__ void make_average_pooling_kernel(float*** input, float*** output, int size, int output_depth, int output_width, int stride) {
__global__ void make_average_pooling_kernel(float*** input, float*** output, int size, int output_depth, int output_width, int stride, int padding) {
// Équivalents respectifs de i, j et k dans la boucle effectuée par le cpu
int idx = threadIdx.x + blockDim.x*blockIdx.x; // < output_depth
int idy = threadIdx.y + blockDim.y*blockIdx.y; // < output_width
int idz = threadIdx.z + blockDim.z*blockIdx.z; // < output_width
int n = size*size;
int max_move = size - padding;
if (idx >= output_depth || idy >= output_width || idz >= output_width) {
return;
}
int nb_elements = 0;
float sum = 0;
for (int a=0; a < size; a++) {
for (int b=0; b < size; b++) {
sum += input[idx][stride*idy +a][stride*idz +b];
for (int a=-padding; a < max_move; a++) {
for (int b=-padding; b < max_move; b++) {
int idy_2 = stride*idy +a;
int idz_2 = stride*idz +b;
if (pooling_not_outside(idy_2, idz_2, 0, output_width)) {
sum += input[idx][idy_2][idz_2];
}
}
output[idx][idy][idz] = sum/(float)n;
}
output[idx][idy][idz] = sum/(float)nb_elements;
}
void make_average_pooling_device(float*** input, float*** output, int size, int output_depth, int output_width, int stride) {
void make_average_pooling_device(float*** input, float*** output, int size, int output_depth, int output_width, int stride, int padding) {
// Make computation
dim3 gridSize(i_div_up(output_depth, BLOCKSIZE_x), i_div_up(output_width, BLOCKSIZE_y), i_div_up(output_width, BLOCKSIZE_z));
dim3 blockSize(BLOCKSIZE_x, BLOCKSIZE_y, BLOCKSIZE_z);
make_average_pooling_kernel<<<gridSize, blockSize>>>(input, output, size, output_depth, output_width, stride);
make_average_pooling_kernel<<<gridSize, blockSize>>>(input, output, size, output_depth, output_width, stride, padding);
gpuErrchk( cudaPeekAtLastError() );
gpuErrchk( cudaDeviceSynchronize() );
}
#endif
void make_average_pooling_cpu(float*** input, float*** output, int size, int output_depth, int output_width, int stride) {
void make_average_pooling_cpu(float*** input, float*** output, int size, int output_depth, int output_width, int stride, int padding) {
// input[output_depth][output_width+size-1][output_width+size-1]
// output[output_depth][output_width][output_width]
float sum;
int n = size*size;
int max_move = size - padding;
for (int i=0; i < output_depth; i++) {
for (int j=0; j < output_width; j++) {
for (int k=0; k < output_width; k++) {
sum = 0;
for (int a=0; a < size; a++) {
for (int b=0; b < size; b++) {
sum += input[i][stride*j +a][stride*k +b];
float sum = 0.;
int nb_elements = 0;
for (int a=-padding; a < max_move; a++) {
for (int b=-padding; b < max_move; b++) {
int j_2 = stride*j +a;
int k_2 = stride*k +b;
if (pooling_not_outside(j_2, k_2, 0, output_width)) {
sum += input[i][j_2][k_2];
nb_elements++;
}
}
output[i][j][k] = sum/(float)n;
}
output[i][j][k] = sum/(float)nb_elements;
}
}
}
@ -71,11 +84,11 @@ void make_average_pooling_cpu(float*** input, float*** output, int size, int out
#ifdef __CUDACC__
extern "C"
#endif
void make_average_pooling(float*** input, float*** output, int size, int output_depth, int output_width, int stride) {
void make_average_pooling(float*** input, float*** output, int size, int output_depth, int output_width, int stride, int padding) {
#ifndef __CUDACC__
make_average_pooling_cpu(input, output, size, output_depth, output_width, stride);
make_average_pooling_cpu(input, output, size, output_depth, output_width, stride, padding);
#else
make_average_pooling_device(input, output, size, output_depth, output_width, stride);
make_average_pooling_device(input, output, size, output_depth, output_width, stride, padding);
#endif
}
@ -87,7 +100,7 @@ void make_average_pooling(float*** input, float*** output, int size, int output_
* Max Pooling
*/
#ifdef __CUDACC__
__global__ void make_max_pooling_kernel(float*** input, float*** output, int size, int output_depth, int output_width, int stride) {
__global__ void make_max_pooling_kernel(float*** input, float*** output, int size, int output_depth, int output_width, int stride, int padding) {
// Équivalents respectifs de i, j et k dans la boucle effectuée par le cpu
int idx = threadIdx.x + blockDim.x*blockIdx.x; // < output_depth
int idy = threadIdx.y + blockDim.y*blockIdx.y; // < output_width
@ -97,40 +110,50 @@ __global__ void make_max_pooling_kernel(float*** input, float*** output, int siz
return;
}
int max_move = size - padding;
float m = -FLT_MAX;
float temp;
for (int a=0; a < size; a++) {
for (int b=0; b < size; b++) {
temp = input[idx][stride*idy +a][stride*idz +b];
for (int a=-padding; a < max_move; a++) {
for (int b=-padding; b < max_move; b++) {
int idy_2 = stride*idy +a;
int idz_2 = stride*idz +b;
if (pooling_not_outside(idy_2, idz_2, 0, output_width)) {
temp = input[idx][idy_2][idz_2];
m = m > temp ? m : temp; // max(m, temp)
}
}
}
output[idx][idy][idz] = m;
}
void make_max_pooling_device(float*** input, float*** output, int size, int output_depth, int output_width, int stride) {
void make_max_pooling_device(float*** input, float*** output, int size, int output_depth, int output_width, int stride, int padding) {
// Make computation
dim3 gridSize(i_div_up(output_depth, BLOCKSIZE_x), i_div_up(output_width, BLOCKSIZE_y), i_div_up(output_width, BLOCKSIZE_z));
dim3 blockSize(BLOCKSIZE_x, BLOCKSIZE_y, BLOCKSIZE_z);
make_max_pooling_kernel<<<gridSize, blockSize>>>(input, output, size, output_depth, output_width, stride);
make_max_pooling_kernel<<<gridSize, blockSize>>>(input, output, size, output_depth, output_width, stride, int padding);
gpuErrchk( cudaPeekAtLastError() );
gpuErrchk( cudaDeviceSynchronize() );
}
#endif
void make_max_pooling_cpu(float*** input, float*** output, int size, int output_depth, int output_width, int stride) {
void make_max_pooling_cpu(float*** input, float*** output, int size, int output_depth, int output_width, int stride, int padding) {
// input[output_depth][output_width+size-1][output_width+size-1]
// output[output_depth][output_width][output_width]
int max_move = size - padding;
float m;
for (int i=0; i < output_depth; i++) {
for (int j=0; j < output_width; j++) {
for (int k=0; k < output_width; k++) {
m = -FLT_MAX;
for (int a=0; a < size; a++) {
for (int b=0; b < size; b++) {
m = fmaxf(m, input[i][stride*j +a][stride*k +b]);
for (int a=-padding; a < max_move; a++) {
for (int b=-padding; b < max_move; b++) {
int j_2 = stride*j +a;
int k_2 = stride*k +b;
if (pooling_not_outside(j_2, k_2, 0, output_width)) {
m = fmaxf(m, input[i][j_2][k_2]);
}
}
}
output[i][j][k] = m;
@ -142,11 +165,11 @@ void make_max_pooling_cpu(float*** input, float*** output, int size, int output_
#ifdef __CUDACC__
extern "C"
#endif
void make_max_pooling(float*** input, float*** output, int size, int output_depth, int output_width, int stride) {
void make_max_pooling(float*** input, float*** output, int size, int output_depth, int output_width, int stride, int padding) {
#ifndef __CUDACC__
make_max_pooling_cpu(input, output, size, output_depth, output_width, stride);
make_max_pooling_cpu(input, output, size, output_depth, output_width, stride, padding);
#else
make_max_pooling_device(input, output, size, output_depth, output_width, stride);
make_max_pooling_device(input, output, size, output_depth, output_width, stride, padding);
#endif
}