diff --git a/src/cnn/cnn.c b/src/cnn/cnn.c index 8216fac..90de023 100644 --- a/src/cnn/cnn.c +++ b/src/cnn/cnn.c @@ -177,6 +177,8 @@ void forward_propagation(Network* network) { int activation = k_i->activation; int pooling = k_i->pooling; + int stride = k_i->stride; + int padding = k_i->padding; if (k_i->nn) { drop_neurones(input, 1, 1, input_width, network->dropout); @@ -189,29 +191,33 @@ void forward_propagation(Network* network) { * On copie les valeurs de output dans output_z, puis on applique la fonction d'activation à output_z */ if (k_i->cnn) { // Convolution - make_convolution(k_i->cnn, input, output, output_width, 1); + make_convolution(k_i->cnn, input, output, output_width, stride, padding); copy_3d_array(output, output_z, output_depth, output_width, output_width); apply_function_to_matrix(activation, output, output_depth, output_width); } else if (k_i->nn) { // Full connection if (k_i->linearisation == DOESNT_LINEARISE) { // Vecteur -> Vecteur make_dense(k_i->nn, input[0][0], output[0][0], input_width, output_width); - } else { // Matrice -> Vecteur + } + else { // Matrice -> Vecteur make_dense_linearized(k_i->nn, input, output[0][0], input_depth, input_width, output_width); } copy_3d_array(output, output_z, 1, 1, output_width); apply_function_to_vector(activation, output, output_width); } else { // Pooling + int kernel_size = 2*padding + input_width + stride - output_width*stride; if (i == n-2) { printf_error("Le réseau ne peut pas finir par un pooling layer\n"); return; } else { // Pooling sur une matrice if (pooling == AVG_POOLING) { - make_average_pooling(input, output, input_width/output_width, output_depth, output_width, input_width/output_width); - } else if (pooling == MAX_POOLING) { - make_max_pooling(input, output, input_width/output_width, output_depth, output_width, input_width/output_width); - } else { + make_average_pooling(input, output, kernel_size, output_depth, output_width, stride, padding); + } + else if (pooling == MAX_POOLING) { + make_max_pooling(input, output, kernel_size, output_depth, output_width, stride, padding); + } + else { printf_error("Impossible de reconnaître le type de couche de pooling: "); printf("identifiant: %d, position: %d\n", pooling, i); } diff --git a/src/cnn/convolution.c b/src/cnn/convolution.c index d0a44be..9459d76 100644 --- a/src/cnn/convolution.c +++ b/src/cnn/convolution.c @@ -8,14 +8,23 @@ #include "include/config.h" -void make_convolution_cpu(Kernel_cnn* kernel, float*** input, float*** output, int output_dim, int stride) { +int convolution_not_outside(int x, int y, int lower_bound, int upper_bound) { + // On renvoie true si et seulement si _ et _: + // lower_bound <= x < upper_bound + // lower_bound <= y < upper_bound + + return !(x < lower_bound || y < lower_bound || x >= upper_bound || y>= upper_bound); +} + +void make_convolution_cpu(Kernel_cnn* kernel, float*** input, float*** output, int output_dim, int stride, int padding) { // c'est le kernel de input // input[kernel->rows][kernel_k_size + output_dim-1][kernel_k_size + output_dim-1] // output[kernel->columns][output_dim][output_dim] - int k_size = kernel->k_size; int k_columns = kernel->columns; int k_rows = kernel->rows; + int max_move = kernel->k_size - padding; + int input_dim = output_dim*stride - 2*padding + kernel->k_size - stride; float f; for (int i=0; i < k_columns; i++) { // filtre @@ -23,9 +32,13 @@ void make_convolution_cpu(Kernel_cnn* kernel, float*** input, float*** output, i for (int k=0; k < output_dim; k++) { // colonne de sortie f = kernel->bias[i][j][k]; for (int a=0; a < k_rows; a++) { // Canal de couleur - for (int b=0; b < k_size; b++) { // ligne du filtre - for (int c=0; c < k_size; c++) { // colonne du filtre - f += kernel->weights[a][i][b][c]*input[a][stride*j+b][stride*k+c]; + for (int b=-padding; b < max_move; b++) { // ligne du filtre + for (int c=-padding; c < max_move; c++) { // colonne du filtre + int x = (stride*j+b); + int y = (stride*k+c); + if (convolution_not_outside(x, y, 0, input_dim)) { + f += kernel->weights[a][i][b][c]*input[a][stride*j+b][stride*k+c]; + } } } } @@ -37,11 +50,13 @@ void make_convolution_cpu(Kernel_cnn* kernel, float*** input, float*** output, i #ifdef __CUDACC__ -__global__ void make_convolution_kernel(Kernel_cnn* kernel, float*** input, float*** output, int output_dim, int stride) { +__global__ void make_convolution_kernel(Kernel_cnn* kernel, float*** input, float*** output, int output_dim, int stride, int padding) { // Équivalents respectifs de i, j et k dans la boucle effectuée par le cpu int idx = threadIdx.x + blockDim.x*blockIdx.x; // < kernel->columns int idy = threadIdx.y + blockDim.y*blockIdx.y; // < min(output_dim, k_size) int idz = threadIdx.z + blockDim.z*blockIdx.z; // < min(output_dim, k_size) + int max_move = kernel->k_size - padding; + int input_dim = output_dim*stride - 2*padding + kernel->k_size - stride; if (idx >= kernel->columns || idy >= output_dim || idz >= output_dim) { return; @@ -50,9 +65,13 @@ __global__ void make_convolution_kernel(Kernel_cnn* kernel, float*** input, floa float f = kernel->bias[idx][idy][idz]; for (int a=0; a < kernel->rows; a++) { - for (int b=0; b < kernel->k_size; b++) { - for (int c=0; c < kernel->k_size; c++) { - f += kernel->weights[a][idx][b][c]*input[a][idy*stride+b][idz*stride+c]; + for (int b=-padding; b < max_move; b++) { + for (int c=-padding; c < max_move; c++) { + int idy_2 = idy*stride+b; + int idz_2 = idz*stride+c; + if (convolution_not_outside(idy_2, idz_2, 0, input_dim)) { + f += kernel->weights[a][idx][b][c]*input[a][idy_2][idz_2]; + } } } } @@ -60,21 +79,21 @@ __global__ void make_convolution_kernel(Kernel_cnn* kernel, float*** input, floa output[idx][idy][idz] = f; } -void make_convolution_device(Kernel_cnn* kernel, float*** input, float*** output, int output_dim, int stride) { +void make_convolution_device(Kernel_cnn* kernel, float*** input, float*** output, int output_dim, int stride, int padding) { // Make computation dim3 gridSize(i_div_up(kernel->columns, BLOCKSIZE_x), i_div_up(output_dim, BLOCKSIZE_y), i_div_up(output_dim, BLOCKSIZE_z)); dim3 blockSize(BLOCKSIZE_x, BLOCKSIZE_y, BLOCKSIZE_z); - make_convolution_kernel<<>>(kernel, input, output, output_dim, stride); + make_convolution_kernel<<>>(kernel, input, output, output_dim, stride, padding); gpuErrchk( cudaPeekAtLastError() ); gpuErrchk( cudaDeviceSynchronize() ); } #endif -void make_convolution(Kernel_cnn* kernel, float*** input, float*** output, int output_dim, int stride) { +void make_convolution(Kernel_cnn* kernel, float*** input, float*** output, int output_dim, int stride, int padding) { #ifndef __CUDACC__ - make_convolution_cpu(kernel, input, output, output_dim, stride); + make_convolution_cpu(kernel, input, output, output_dim, stride, padding); #else - make_convolution_device(kernel, input, output, output_dim, stride); + make_convolution_device(kernel, input, output, output_dim, stride, padding); #endif } \ No newline at end of file diff --git a/src/cnn/convolution.cu b/src/cnn/convolution.cu index d0a44be..9459d76 100644 --- a/src/cnn/convolution.cu +++ b/src/cnn/convolution.cu @@ -8,14 +8,23 @@ #include "include/config.h" -void make_convolution_cpu(Kernel_cnn* kernel, float*** input, float*** output, int output_dim, int stride) { +int convolution_not_outside(int x, int y, int lower_bound, int upper_bound) { + // On renvoie true si et seulement si _ et _: + // lower_bound <= x < upper_bound + // lower_bound <= y < upper_bound + + return !(x < lower_bound || y < lower_bound || x >= upper_bound || y>= upper_bound); +} + +void make_convolution_cpu(Kernel_cnn* kernel, float*** input, float*** output, int output_dim, int stride, int padding) { // c'est le kernel de input // input[kernel->rows][kernel_k_size + output_dim-1][kernel_k_size + output_dim-1] // output[kernel->columns][output_dim][output_dim] - int k_size = kernel->k_size; int k_columns = kernel->columns; int k_rows = kernel->rows; + int max_move = kernel->k_size - padding; + int input_dim = output_dim*stride - 2*padding + kernel->k_size - stride; float f; for (int i=0; i < k_columns; i++) { // filtre @@ -23,9 +32,13 @@ void make_convolution_cpu(Kernel_cnn* kernel, float*** input, float*** output, i for (int k=0; k < output_dim; k++) { // colonne de sortie f = kernel->bias[i][j][k]; for (int a=0; a < k_rows; a++) { // Canal de couleur - for (int b=0; b < k_size; b++) { // ligne du filtre - for (int c=0; c < k_size; c++) { // colonne du filtre - f += kernel->weights[a][i][b][c]*input[a][stride*j+b][stride*k+c]; + for (int b=-padding; b < max_move; b++) { // ligne du filtre + for (int c=-padding; c < max_move; c++) { // colonne du filtre + int x = (stride*j+b); + int y = (stride*k+c); + if (convolution_not_outside(x, y, 0, input_dim)) { + f += kernel->weights[a][i][b][c]*input[a][stride*j+b][stride*k+c]; + } } } } @@ -37,11 +50,13 @@ void make_convolution_cpu(Kernel_cnn* kernel, float*** input, float*** output, i #ifdef __CUDACC__ -__global__ void make_convolution_kernel(Kernel_cnn* kernel, float*** input, float*** output, int output_dim, int stride) { +__global__ void make_convolution_kernel(Kernel_cnn* kernel, float*** input, float*** output, int output_dim, int stride, int padding) { // Équivalents respectifs de i, j et k dans la boucle effectuée par le cpu int idx = threadIdx.x + blockDim.x*blockIdx.x; // < kernel->columns int idy = threadIdx.y + blockDim.y*blockIdx.y; // < min(output_dim, k_size) int idz = threadIdx.z + blockDim.z*blockIdx.z; // < min(output_dim, k_size) + int max_move = kernel->k_size - padding; + int input_dim = output_dim*stride - 2*padding + kernel->k_size - stride; if (idx >= kernel->columns || idy >= output_dim || idz >= output_dim) { return; @@ -50,9 +65,13 @@ __global__ void make_convolution_kernel(Kernel_cnn* kernel, float*** input, floa float f = kernel->bias[idx][idy][idz]; for (int a=0; a < kernel->rows; a++) { - for (int b=0; b < kernel->k_size; b++) { - for (int c=0; c < kernel->k_size; c++) { - f += kernel->weights[a][idx][b][c]*input[a][idy*stride+b][idz*stride+c]; + for (int b=-padding; b < max_move; b++) { + for (int c=-padding; c < max_move; c++) { + int idy_2 = idy*stride+b; + int idz_2 = idz*stride+c; + if (convolution_not_outside(idy_2, idz_2, 0, input_dim)) { + f += kernel->weights[a][idx][b][c]*input[a][idy_2][idz_2]; + } } } } @@ -60,21 +79,21 @@ __global__ void make_convolution_kernel(Kernel_cnn* kernel, float*** input, floa output[idx][idy][idz] = f; } -void make_convolution_device(Kernel_cnn* kernel, float*** input, float*** output, int output_dim, int stride) { +void make_convolution_device(Kernel_cnn* kernel, float*** input, float*** output, int output_dim, int stride, int padding) { // Make computation dim3 gridSize(i_div_up(kernel->columns, BLOCKSIZE_x), i_div_up(output_dim, BLOCKSIZE_y), i_div_up(output_dim, BLOCKSIZE_z)); dim3 blockSize(BLOCKSIZE_x, BLOCKSIZE_y, BLOCKSIZE_z); - make_convolution_kernel<<>>(kernel, input, output, output_dim, stride); + make_convolution_kernel<<>>(kernel, input, output, output_dim, stride, padding); gpuErrchk( cudaPeekAtLastError() ); gpuErrchk( cudaDeviceSynchronize() ); } #endif -void make_convolution(Kernel_cnn* kernel, float*** input, float*** output, int output_dim, int stride) { +void make_convolution(Kernel_cnn* kernel, float*** input, float*** output, int output_dim, int stride, int padding) { #ifndef __CUDACC__ - make_convolution_cpu(kernel, input, output, output_dim, stride); + make_convolution_cpu(kernel, input, output, output_dim, stride, padding); #else - make_convolution_device(kernel, input, output, output_dim, stride); + make_convolution_device(kernel, input, output, output_dim, stride, padding); #endif } \ No newline at end of file diff --git a/src/cnn/creation.c b/src/cnn/creation.c index 18dea7e..023132a 100644 --- a/src/cnn/creation.c +++ b/src/cnn/creation.c @@ -40,10 +40,10 @@ Network* create_network(int max_size, float learning_rate, int dropout, int acti Network* create_network_lenet5(float learning_rate, int dropout, int activation, int initialisation, int input_dim, int input_depth) { Network* network = create_network(8, learning_rate, dropout, activation, initialisation, input_dim, input_depth); - add_convolution(network, 6, 28, activation); - add_average_pooling(network, 14); - add_convolution(network, 16, 10, activation); - add_average_pooling(network, 5); + add_convolution(network, 5, 6, 1, 0, activation); + add_average_pooling(network, 2, 2, 0); + add_convolution(network, 5, 16, 1, 0, activation); + add_average_pooling(network, 2, 2, 0); add_dense_linearisation(network, 120, activation); add_dense(network, 84, activation); add_dense(network, 10, SOFTMAX); @@ -97,51 +97,51 @@ void create_a_line_input_z_layer(Network* network, int pos, int dim) { network->depth[pos] = 1; } -void add_average_pooling(Network* network, int dim_output) { +void add_average_pooling(Network* network, int kernel_size, int stride, int padding) { int n = network->size; int k_pos = n-1; - int dim_input = network->width[k_pos]; if (network->max_size == n) { printf_error("Impossible de rajouter une couche d'average pooling, le réseau est déjà plein\n"); return; } - if (dim_input%dim_output != 0) { - printf_error("Dimension de l'average pooling incorrecte\n"); - return; - } + int dim_input = network->width[k_pos]; + int dim_output = (2*padding + dim_input - (kernel_size - stride))/stride; + network->kernel[k_pos]->cnn = NULL; network->kernel[k_pos]->nn = NULL; + network->kernel[k_pos]->stride = stride; + network->kernel[k_pos]->padding = padding; network->kernel[k_pos]->activation = IDENTITY; // Ne contient pas de fonction d'activation network->kernel[k_pos]->linearisation = DOESNT_LINEARISE; network->kernel[k_pos]->pooling = AVG_POOLING; - create_a_cube_input_layer(network, n, network->depth[n-1], network->width[n-1]/2); - create_a_cube_input_z_layer(network, n, network->depth[n-1], network->width[n-1]/2); + + create_a_cube_input_layer(network, n, network->depth[n-1], dim_output); + create_a_cube_input_z_layer(network, n, network->depth[n-1], dim_output); network->size++; } -void add_max_pooling(Network* network, int dim_output) { +void add_max_pooling(Network* network, int kernel_size, int stride, int padding) { int n = network->size; int k_pos = n-1; - int dim_input = network->width[k_pos]; if (network->max_size == n) { printf_error("Impossible de rajouter une couche de max pooling, le réseau est déjà plein\n"); return; } - if (dim_input%dim_output != 0) { - printf_error("Dimension du max pooling incorrecte\n"); - return; - } + int dim_input = network->width[k_pos]; + int dim_output = (2*padding + dim_input - (kernel_size - stride))/stride; + network->kernel[k_pos]->cnn = NULL; network->kernel[k_pos]->nn = NULL; network->kernel[k_pos]->activation = IDENTITY; // Ne contient pas de fonction d'activation network->kernel[k_pos]->linearisation = DOESNT_LINEARISE; network->kernel[k_pos]->pooling = MAX_POOLING; - create_a_cube_input_layer(network, n, network->depth[n-1], network->width[n-1]/2); - create_a_cube_input_z_layer(network, n, network->depth[n-1], network->width[n-1]/2); + + create_a_cube_input_layer(network, n, network->depth[n-1], dim_output); + create_a_cube_input_z_layer(network, n, network->depth[n-1], dim_output); network->size++; } -void add_convolution(Network* network, int depth_output, int dim_output, int activation) { +void add_convolution(Network* network, int kernel_size, int number_of_kernels, int stride, int padding, int activation) { int n = network->size; int k_pos = n-1; if (network->max_size == n) { @@ -151,18 +151,24 @@ void add_convolution(Network* network, int depth_output, int dim_output, int act int depth_input = network->depth[k_pos]; int dim_input = network->width[k_pos]; + int dim_output = (2*padding + dim_input - (kernel_size - stride))/stride; + int depth_output = number_of_kernels; + int bias_size = dim_output; - int kernel_size = dim_input - dim_output +1; + network->kernel[k_pos]->nn = NULL; + network->kernel[k_pos]->stride = stride; + network->kernel[k_pos]->padding = padding; network->kernel[k_pos]->activation = activation; network->kernel[k_pos]->linearisation = DOESNT_LINEARISE; network->kernel[k_pos]->pooling = NO_POOLING; + network->kernel[k_pos]->cnn = (Kernel_cnn*)nalloc(1, sizeof(Kernel_cnn)); Kernel_cnn* cnn = network->kernel[k_pos]->cnn; - cnn->k_size = kernel_size; cnn->rows = depth_input; cnn->columns = depth_output; + cnn->weights = (float****)nalloc(depth_input, sizeof(float***)); cnn->d_weights = (float****)nalloc(depth_input, sizeof(float***)); #ifdef ADAM_CNN_WEIGHTS @@ -200,6 +206,7 @@ void add_convolution(Network* network, int depth_output, int dim_output, int act } } } + cnn->bias = (float***)nalloc(depth_output, sizeof(float**)); cnn->d_bias = (float***)nalloc(depth_output, sizeof(float**)); #ifdef ADAM_CNN_BIAS @@ -229,6 +236,7 @@ void add_convolution(Network* network, int depth_output, int dim_output, int act } } } + int n_in = network->width[n-1]*network->width[n-1]*network->depth[n-1]; int n_out = network->width[n]*network->width[n]*network->depth[n]; initialisation_3d_matrix(network->initialisation, cnn->bias, depth_output, dim_output, dim_output, n_in, n_out); @@ -247,13 +255,17 @@ void add_dense(Network* network, int size_output, int activation) { return; } network->kernel[k_pos]->cnn = NULL; - network->kernel[k_pos]->nn = (Kernel_nn*)nalloc(1, sizeof(Kernel_nn)); - Kernel_nn* nn = network->kernel[k_pos]->nn; + network->kernel[k_pos]->stride = -1; // N'est pas utilisé dans une couche dense + network->kernel[k_pos]->padding = -1; // N'est pas utilisé dans une couche dense network->kernel[k_pos]->activation = activation; network->kernel[k_pos]->linearisation = DOESNT_LINEARISE; network->kernel[k_pos]->pooling = NO_POOLING; + + network->kernel[k_pos]->nn = (Kernel_nn*)nalloc(1, sizeof(Kernel_nn)); + Kernel_nn* nn = network->kernel[k_pos]->nn; nn->size_input = size_input; nn->size_output = size_output; + nn->bias = (float*)nalloc(size_output, sizeof(float)); nn->d_bias = (float*)nalloc(size_output, sizeof(float)); #ifdef ADAM_DENSE_BIAS @@ -289,7 +301,7 @@ void add_dense(Network* network, int size_output, int activation) { #endif } } - + initialisation_1d_matrix(network->initialisation, nn->bias, size_output, size_input, size_output); initialisation_2d_matrix(network->initialisation, nn->weights, size_input, size_output, size_input, size_output); create_a_line_input_layer(network, n, size_output); @@ -308,11 +320,14 @@ void add_dense_linearisation(Network* network, int size_output, int activation) return; } network->kernel[k_pos]->cnn = NULL; - network->kernel[k_pos]->nn = (Kernel_nn*)nalloc(1, sizeof(Kernel_nn)); - Kernel_nn* nn = network->kernel[k_pos]->nn; + network->kernel[k_pos]->stride = -1; // N'est pas utilisé dans une couche dense + network->kernel[k_pos]->padding = -1; // N'est pas utilisé dans une couche dense network->kernel[k_pos]->activation = activation; network->kernel[k_pos]->linearisation = DO_LINEARISE; network->kernel[k_pos]->pooling = NO_POOLING; + + network->kernel[k_pos]->nn = (Kernel_nn*)nalloc(1, sizeof(Kernel_nn)); + Kernel_nn* nn = network->kernel[k_pos]->nn; nn->size_input = size_input; nn->size_output = size_output; @@ -329,6 +344,7 @@ void add_dense_linearisation(Network* network, int size_output, int activation) nn->v_d_bias[i] = 0.; #endif } + nn->weights = (float**)nalloc(size_input, sizeof(float*)); nn->d_weights = (float**)nalloc(size_input, sizeof(float*)); #ifdef ADAM_DENSE_WEIGHTS @@ -350,6 +366,7 @@ void add_dense_linearisation(Network* network, int size_output, int activation) #endif } } + initialisation_1d_matrix(network->initialisation, nn->bias, size_output, size_input, size_output); initialisation_2d_matrix(network->initialisation, nn->weights, size_input, size_output, size_input, size_output); create_a_line_input_layer(network, n, size_output); diff --git a/src/cnn/include/convolution.h b/src/cnn/include/convolution.h index 09cc0e9..ebc3ad1 100644 --- a/src/cnn/include/convolution.h +++ b/src/cnn/include/convolution.h @@ -3,21 +3,21 @@ /* * Effectue la convolution naïvement sur le processeur */ -void make_convolution_cpu(Kernel_cnn* kernel, float*** input, float*** output, int output_dim, int stride); +void make_convolution_cpu(Kernel_cnn* kernel, float*** input, float*** output, int output_dim, int stride, int padding); #ifdef __CUDACC__ /* * Kernel de la convolution sur carte graphique */ -__global__ void make_convolution_kernel(int k_size, int columns, int rows, float* bias, size_t pitch_bias, float**** weights, size_t pitch_weights, float*** input, size_t pitch_input, float*** output, size_t pitch_output, int output_dim, int stride); +__global__ void make_convolution_kernel(int k_size, int columns, int rows, float* bias, size_t pitch_bias, float**** weights, size_t pitch_weights, float*** input, size_t pitch_input, float*** output, size_t pitch_output, int output_dim, int stride, int padding); /* * Effectue la convolution naïvement sur la carte graphique */ -void make_convolution_device(Kernel_cnn* kernel, float*** input, float*** output, int output_dim, int stride); +void make_convolution_device(Kernel_cnn* kernel, float*** input, float*** output, int output_dim, int stride, int padding); #endif /* * Détermine si la convolution peut-être faite sur la carte graphique au moment de la compilation */ -void make_convolution(Kernel_cnn* kernel, float*** input, float*** output, int output_dim, int stride); \ No newline at end of file +void make_convolution(Kernel_cnn* kernel, float*** input, float*** output, int output_dim, int stride, int padding); \ No newline at end of file diff --git a/src/cnn/include/creation.h b/src/cnn/include/creation.h index 3ee8611..759a015 100644 --- a/src/cnn/include/creation.h +++ b/src/cnn/include/creation.h @@ -35,19 +35,24 @@ void create_a_cube_input_z_layer(Network* network, int pos, int depth, int dim); void create_a_line_input_layer(Network* network, int pos, int dim); /* -* Ajoute au réseau une couche d'average pooling valide de dimension dim*dim +* Ajoute au réseau une couche d'average pooling avec la taille de noyau (kernel_size), +* le remplissage (padding) et le décalge (stride) choisis */ -void add_average_pooling(Network* network, int dim_output); +void add_average_pooling(Network* network, int kernel_size, int stride, int padding); /* -* Ajoute au réseau une couche de max pooling valide de dimension dim*dim +* Ajoute au réseau une couche de max pooling avec la taille de noyau (kernel_size), +* le remplissage (padding) et le décalge (stride) choisis */ -void add_max_pooling(Network* network, int dim_output); +void add_max_pooling(Network* network, int kernel_size, int stride, int padding); /* -* Ajoute au réseau une couche de convolution dim*dim et initialise les kernels +* Ajoute au réseau une couche de convolution avec la taille de noyau (kernel_size), +* le remplissage (padding) et le décalge (stride) choisis. Le choix de la profondeur de +* la couche suivante se fait avec number_of_kernels (= depth_output) +* Puis initialise les poids et les biais construits */ -void add_convolution(Network* network, int depth_output, int dim_output, int activation); +void add_convolution(Network* network, int kernel_size, int number_of_kernels, int stride, int padding, int activation); /* * Ajoute au réseau une couche dense et initialise les poids et les biais diff --git a/src/cnn/include/make.h b/src/cnn/include/make.h index bf7930c..034fc43 100644 --- a/src/cnn/include/make.h +++ b/src/cnn/include/make.h @@ -4,36 +4,44 @@ #define DEF_MAKE_H /* -* Effectue une convolution sans stride sur le processeur +* +* On renvoie true si et seulement si _ et _: +* lower_bound <= y < upper_bound +* lower_bound <= x < upper_bound */ -void make_convolution_cpu(Kernel_cnn* kernel, float*** input, float*** output, int output_dim, int stride); +int pooling_not_outside(int x, int y, int lower_bound, int upper_bound); /* -* Effectue la convolution sur le CPU ou GPU +* Effectue la propagation d'une convolution avec stride et padding choisis sur le processeur */ -void make_convolution(Kernel_cnn* kernel, float*** input, float*** output, int output_dim, int stride); +void make_convolution_cpu(Kernel_cnn* kernel, float*** input, float*** output, int output_dim, int stride, int padding); + +/* +* Effectue la propagation d'une convolution avec stride et padding choisis sur le CPU ou GPU +*/ +void make_convolution(Kernel_cnn* kernel, float*** input, float*** output, int output_dim, int stride, int padding); #ifdef __CUDACC__ extern "C" #endif /* -* Effectue un average pooling avec stride=size +* Effectue propagation d'average pooling avec stride et padding choisis */ -void make_average_pooling(float*** input, float*** output, int size, int output_depth, int output_dim, int stride); +void make_average_pooling(float*** input, float*** output, int size, int output_depth, int output_dim, int stride, int padding); #ifdef __CUDACC__ extern "C" #endif /* -* Effectue un max pooling avec stride=size +* Effectue propagation de max pooling avec stride et padding choisis */ -void make_max_pooling(float*** input, float*** output, int size, int output_depth, int output_dim, int stride); +void make_max_pooling(float*** input, float*** output, int size, int output_depth, int output_dim, int stride, int padding); #ifdef __CUDACC__ extern "C" #endif /* -* Effectue une full connection +* Effectue la propagation d'une couche dense */ void make_dense(Kernel_nn* kernel, float* input, float* output, int size_input, int size_output); @@ -41,7 +49,7 @@ void make_dense(Kernel_nn* kernel, float* input, float* output, int size_input, extern "C" #endif /* -* Effectue une full connection qui passe d'une matrice à un vecteur +* Effectue la propagation d'une couche dense qui passe d'une matrice à un vecteur */ void make_dense_linearized(Kernel_nn* kernel, float*** input, float* output, int depth_input, int dim_input, int size_output); diff --git a/src/cnn/include/struct.h b/src/cnn/include/struct.h index 2e04481..6b8ab63 100644 --- a/src/cnn/include/struct.h +++ b/src/cnn/include/struct.h @@ -16,14 +16,14 @@ typedef struct Kernel_cnn { int rows; // Depth de l'input int columns; // Depth de l'output - float*** bias; // bias[columns][dim_output][dim_output] + float*** bias; // bias[columns][dim_output][dim_output] <=> bias[depth output][dim output][dim output] float*** d_bias; // d_bias[columns][dim_output][dim_output] #ifdef ADAM_CNN_BIAS float*** s_d_bias; // s_d_bias[columns][dim_output][dim_output] float*** v_d_bias; // v_d_bias[columns][dim_output][dim_output] #endif - float**** weights; // weights[rows][columns][k_size][k_size] + float**** weights; // weights[rows][columns][k_size][k_size] <=> weights[depth input][depth output][size kernel][size kernel] float**** d_weights; // d_weights[rows][columns][k_size][k_size] #ifdef ADAM_CNN_WEIGHTS float**** s_d_weights; // s_d_weights[rows][columns][k_size][k_size] @@ -58,6 +58,8 @@ typedef struct Kernel { int activation; // Id de la fonction d'activation et -Id de sa dérivée int linearisation; // 1 si c'est la linéarisation d'une couche, 0 sinon int pooling; // 0 si pas pooling, 1 si average_pooling, 2 si max_pooling + int stride; // Valable uniquement une pooling et un cnn + int padding; // Valable uniquement une pooling et un cnn } Kernel; diff --git a/src/cnn/make.c b/src/cnn/make.c index 5aca3ad..3a6572b 100644 --- a/src/cnn/make.c +++ b/src/cnn/make.c @@ -10,59 +10,72 @@ #include "include/config.h" +int pooling_not_outside(int x, int y, int lower_bound, int upper_bound) { + return !(x < lower_bound || y < lower_bound || x >= upper_bound || y>= upper_bound); +} /* * Average Pooling */ #ifdef __CUDACC__ -__global__ void make_average_pooling_kernel(float*** input, float*** output, int size, int output_depth, int output_width, int stride) { +__global__ void make_average_pooling_kernel(float*** input, float*** output, int size, int output_depth, int output_width, int stride, int padding) { // Équivalents respectifs de i, j et k dans la boucle effectuée par le cpu int idx = threadIdx.x + blockDim.x*blockIdx.x; // < output_depth int idy = threadIdx.y + blockDim.y*blockIdx.y; // < output_width int idz = threadIdx.z + blockDim.z*blockIdx.z; // < output_width - int n = size*size; + int max_move = size - padding; if (idx >= output_depth || idy >= output_width || idz >= output_width) { return; } + int nb_elements = 0; float sum = 0; - for (int a=0; a < size; a++) { - for (int b=0; b < size; b++) { - sum += input[idx][stride*idy +a][stride*idz +b]; + for (int a=-padding; a < max_move; a++) { + for (int b=-padding; b < max_move; b++) { + int idy_2 = stride*idy +a; + int idz_2 = stride*idz +b; + if (pooling_not_outside(idy_2, idz_2, 0, output_width)) { + sum += input[idx][idy_2][idz_2]; + } } } - output[idx][idy][idz] = sum/(float)n; + output[idx][idy][idz] = sum/(float)nb_elements; } -void make_average_pooling_device(float*** input, float*** output, int size, int output_depth, int output_width, int stride) { +void make_average_pooling_device(float*** input, float*** output, int size, int output_depth, int output_width, int stride, int padding) { // Make computation dim3 gridSize(i_div_up(output_depth, BLOCKSIZE_x), i_div_up(output_width, BLOCKSIZE_y), i_div_up(output_width, BLOCKSIZE_z)); dim3 blockSize(BLOCKSIZE_x, BLOCKSIZE_y, BLOCKSIZE_z); - make_average_pooling_kernel<<>>(input, output, size, output_depth, output_width, stride); + make_average_pooling_kernel<<>>(input, output, size, output_depth, output_width, stride, padding); gpuErrchk( cudaPeekAtLastError() ); gpuErrchk( cudaDeviceSynchronize() ); } #endif -void make_average_pooling_cpu(float*** input, float*** output, int size, int output_depth, int output_width, int stride) { +void make_average_pooling_cpu(float*** input, float*** output, int size, int output_depth, int output_width, int stride, int padding) { // input[output_depth][output_width+size-1][output_width+size-1] // output[output_depth][output_width][output_width] - float sum; - int n = size*size; + int max_move = size - padding; for (int i=0; i < output_depth; i++) { for (int j=0; j < output_width; j++) { for (int k=0; k < output_width; k++) { - sum = 0; - for (int a=0; a < size; a++) { - for (int b=0; b < size; b++) { - sum += input[i][stride*j +a][stride*k +b]; + float sum = 0.; + int nb_elements = 0; + for (int a=-padding; a < max_move; a++) { + for (int b=-padding; b < max_move; b++) { + int j_2 = stride*j +a; + int k_2 = stride*k +b; + if (pooling_not_outside(j_2, k_2, 0, output_width)) { + sum += input[i][j_2][k_2]; + nb_elements++; + } } } - output[i][j][k] = sum/(float)n; + output[i][j][k] = sum/(float)nb_elements; } } } @@ -71,11 +84,11 @@ void make_average_pooling_cpu(float*** input, float*** output, int size, int out #ifdef __CUDACC__ extern "C" #endif -void make_average_pooling(float*** input, float*** output, int size, int output_depth, int output_width, int stride) { +void make_average_pooling(float*** input, float*** output, int size, int output_depth, int output_width, int stride, int padding) { #ifndef __CUDACC__ - make_average_pooling_cpu(input, output, size, output_depth, output_width, stride); + make_average_pooling_cpu(input, output, size, output_depth, output_width, stride, padding); #else - make_average_pooling_device(input, output, size, output_depth, output_width, stride); + make_average_pooling_device(input, output, size, output_depth, output_width, stride, padding); #endif } @@ -87,7 +100,7 @@ void make_average_pooling(float*** input, float*** output, int size, int output_ * Max Pooling */ #ifdef __CUDACC__ -__global__ void make_max_pooling_kernel(float*** input, float*** output, int size, int output_depth, int output_width, int stride) { +__global__ void make_max_pooling_kernel(float*** input, float*** output, int size, int output_depth, int output_width, int stride, int padding) { // Équivalents respectifs de i, j et k dans la boucle effectuée par le cpu int idx = threadIdx.x + blockDim.x*blockIdx.x; // < output_depth int idy = threadIdx.y + blockDim.y*blockIdx.y; // < output_width @@ -97,40 +110,50 @@ __global__ void make_max_pooling_kernel(float*** input, float*** output, int siz return; } + int max_move = size - padding; float m = -FLT_MAX; float temp; - for (int a=0; a < size; a++) { - for (int b=0; b < size; b++) { - temp = input[idx][stride*idy +a][stride*idz +b]; - m = m > temp ? m : temp; // max(m, temp) + for (int a=-padding; a < max_move; a++) { + for (int b=-padding; b < max_move; b++) { + int idy_2 = stride*idy +a; + int idz_2 = stride*idz +b; + if (pooling_not_outside(idy_2, idz_2, 0, output_width)) { + temp = input[idx][idy_2][idz_2]; + m = m > temp ? m : temp; // max(m, temp) + } } } output[idx][idy][idz] = m; } -void make_max_pooling_device(float*** input, float*** output, int size, int output_depth, int output_width, int stride) { +void make_max_pooling_device(float*** input, float*** output, int size, int output_depth, int output_width, int stride, int padding) { // Make computation dim3 gridSize(i_div_up(output_depth, BLOCKSIZE_x), i_div_up(output_width, BLOCKSIZE_y), i_div_up(output_width, BLOCKSIZE_z)); dim3 blockSize(BLOCKSIZE_x, BLOCKSIZE_y, BLOCKSIZE_z); - make_max_pooling_kernel<<>>(input, output, size, output_depth, output_width, stride); + make_max_pooling_kernel<<>>(input, output, size, output_depth, output_width, stride, int padding); gpuErrchk( cudaPeekAtLastError() ); gpuErrchk( cudaDeviceSynchronize() ); } #endif -void make_max_pooling_cpu(float*** input, float*** output, int size, int output_depth, int output_width, int stride) { +void make_max_pooling_cpu(float*** input, float*** output, int size, int output_depth, int output_width, int stride, int padding) { // input[output_depth][output_width+size-1][output_width+size-1] // output[output_depth][output_width][output_width] + int max_move = size - padding; float m; for (int i=0; i < output_depth; i++) { for (int j=0; j < output_width; j++) { for (int k=0; k < output_width; k++) { m = -FLT_MAX; - for (int a=0; a < size; a++) { - for (int b=0; b < size; b++) { - m = fmaxf(m, input[i][stride*j +a][stride*k +b]); + for (int a=-padding; a < max_move; a++) { + for (int b=-padding; b < max_move; b++) { + int j_2 = stride*j +a; + int k_2 = stride*k +b; + if (pooling_not_outside(j_2, k_2, 0, output_width)) { + m = fmaxf(m, input[i][j_2][k_2]); + } } } output[i][j][k] = m; @@ -142,11 +165,11 @@ void make_max_pooling_cpu(float*** input, float*** output, int size, int output_ #ifdef __CUDACC__ extern "C" #endif -void make_max_pooling(float*** input, float*** output, int size, int output_depth, int output_width, int stride) { +void make_max_pooling(float*** input, float*** output, int size, int output_depth, int output_width, int stride, int padding) { #ifndef __CUDACC__ - make_max_pooling_cpu(input, output, size, output_depth, output_width, stride); + make_max_pooling_cpu(input, output, size, output_depth, output_width, stride, padding); #else - make_max_pooling_device(input, output, size, output_depth, output_width, stride); + make_max_pooling_device(input, output, size, output_depth, output_width, stride, padding); #endif } diff --git a/src/cnn/make.cu b/src/cnn/make.cu index 5aca3ad..3a6572b 100644 --- a/src/cnn/make.cu +++ b/src/cnn/make.cu @@ -10,59 +10,72 @@ #include "include/config.h" +int pooling_not_outside(int x, int y, int lower_bound, int upper_bound) { + return !(x < lower_bound || y < lower_bound || x >= upper_bound || y>= upper_bound); +} /* * Average Pooling */ #ifdef __CUDACC__ -__global__ void make_average_pooling_kernel(float*** input, float*** output, int size, int output_depth, int output_width, int stride) { +__global__ void make_average_pooling_kernel(float*** input, float*** output, int size, int output_depth, int output_width, int stride, int padding) { // Équivalents respectifs de i, j et k dans la boucle effectuée par le cpu int idx = threadIdx.x + blockDim.x*blockIdx.x; // < output_depth int idy = threadIdx.y + blockDim.y*blockIdx.y; // < output_width int idz = threadIdx.z + blockDim.z*blockIdx.z; // < output_width - int n = size*size; + int max_move = size - padding; if (idx >= output_depth || idy >= output_width || idz >= output_width) { return; } + int nb_elements = 0; float sum = 0; - for (int a=0; a < size; a++) { - for (int b=0; b < size; b++) { - sum += input[idx][stride*idy +a][stride*idz +b]; + for (int a=-padding; a < max_move; a++) { + for (int b=-padding; b < max_move; b++) { + int idy_2 = stride*idy +a; + int idz_2 = stride*idz +b; + if (pooling_not_outside(idy_2, idz_2, 0, output_width)) { + sum += input[idx][idy_2][idz_2]; + } } } - output[idx][idy][idz] = sum/(float)n; + output[idx][idy][idz] = sum/(float)nb_elements; } -void make_average_pooling_device(float*** input, float*** output, int size, int output_depth, int output_width, int stride) { +void make_average_pooling_device(float*** input, float*** output, int size, int output_depth, int output_width, int stride, int padding) { // Make computation dim3 gridSize(i_div_up(output_depth, BLOCKSIZE_x), i_div_up(output_width, BLOCKSIZE_y), i_div_up(output_width, BLOCKSIZE_z)); dim3 blockSize(BLOCKSIZE_x, BLOCKSIZE_y, BLOCKSIZE_z); - make_average_pooling_kernel<<>>(input, output, size, output_depth, output_width, stride); + make_average_pooling_kernel<<>>(input, output, size, output_depth, output_width, stride, padding); gpuErrchk( cudaPeekAtLastError() ); gpuErrchk( cudaDeviceSynchronize() ); } #endif -void make_average_pooling_cpu(float*** input, float*** output, int size, int output_depth, int output_width, int stride) { +void make_average_pooling_cpu(float*** input, float*** output, int size, int output_depth, int output_width, int stride, int padding) { // input[output_depth][output_width+size-1][output_width+size-1] // output[output_depth][output_width][output_width] - float sum; - int n = size*size; + int max_move = size - padding; for (int i=0; i < output_depth; i++) { for (int j=0; j < output_width; j++) { for (int k=0; k < output_width; k++) { - sum = 0; - for (int a=0; a < size; a++) { - for (int b=0; b < size; b++) { - sum += input[i][stride*j +a][stride*k +b]; + float sum = 0.; + int nb_elements = 0; + for (int a=-padding; a < max_move; a++) { + for (int b=-padding; b < max_move; b++) { + int j_2 = stride*j +a; + int k_2 = stride*k +b; + if (pooling_not_outside(j_2, k_2, 0, output_width)) { + sum += input[i][j_2][k_2]; + nb_elements++; + } } } - output[i][j][k] = sum/(float)n; + output[i][j][k] = sum/(float)nb_elements; } } } @@ -71,11 +84,11 @@ void make_average_pooling_cpu(float*** input, float*** output, int size, int out #ifdef __CUDACC__ extern "C" #endif -void make_average_pooling(float*** input, float*** output, int size, int output_depth, int output_width, int stride) { +void make_average_pooling(float*** input, float*** output, int size, int output_depth, int output_width, int stride, int padding) { #ifndef __CUDACC__ - make_average_pooling_cpu(input, output, size, output_depth, output_width, stride); + make_average_pooling_cpu(input, output, size, output_depth, output_width, stride, padding); #else - make_average_pooling_device(input, output, size, output_depth, output_width, stride); + make_average_pooling_device(input, output, size, output_depth, output_width, stride, padding); #endif } @@ -87,7 +100,7 @@ void make_average_pooling(float*** input, float*** output, int size, int output_ * Max Pooling */ #ifdef __CUDACC__ -__global__ void make_max_pooling_kernel(float*** input, float*** output, int size, int output_depth, int output_width, int stride) { +__global__ void make_max_pooling_kernel(float*** input, float*** output, int size, int output_depth, int output_width, int stride, int padding) { // Équivalents respectifs de i, j et k dans la boucle effectuée par le cpu int idx = threadIdx.x + blockDim.x*blockIdx.x; // < output_depth int idy = threadIdx.y + blockDim.y*blockIdx.y; // < output_width @@ -97,40 +110,50 @@ __global__ void make_max_pooling_kernel(float*** input, float*** output, int siz return; } + int max_move = size - padding; float m = -FLT_MAX; float temp; - for (int a=0; a < size; a++) { - for (int b=0; b < size; b++) { - temp = input[idx][stride*idy +a][stride*idz +b]; - m = m > temp ? m : temp; // max(m, temp) + for (int a=-padding; a < max_move; a++) { + for (int b=-padding; b < max_move; b++) { + int idy_2 = stride*idy +a; + int idz_2 = stride*idz +b; + if (pooling_not_outside(idy_2, idz_2, 0, output_width)) { + temp = input[idx][idy_2][idz_2]; + m = m > temp ? m : temp; // max(m, temp) + } } } output[idx][idy][idz] = m; } -void make_max_pooling_device(float*** input, float*** output, int size, int output_depth, int output_width, int stride) { +void make_max_pooling_device(float*** input, float*** output, int size, int output_depth, int output_width, int stride, int padding) { // Make computation dim3 gridSize(i_div_up(output_depth, BLOCKSIZE_x), i_div_up(output_width, BLOCKSIZE_y), i_div_up(output_width, BLOCKSIZE_z)); dim3 blockSize(BLOCKSIZE_x, BLOCKSIZE_y, BLOCKSIZE_z); - make_max_pooling_kernel<<>>(input, output, size, output_depth, output_width, stride); + make_max_pooling_kernel<<>>(input, output, size, output_depth, output_width, stride, int padding); gpuErrchk( cudaPeekAtLastError() ); gpuErrchk( cudaDeviceSynchronize() ); } #endif -void make_max_pooling_cpu(float*** input, float*** output, int size, int output_depth, int output_width, int stride) { +void make_max_pooling_cpu(float*** input, float*** output, int size, int output_depth, int output_width, int stride, int padding) { // input[output_depth][output_width+size-1][output_width+size-1] // output[output_depth][output_width][output_width] + int max_move = size - padding; float m; for (int i=0; i < output_depth; i++) { for (int j=0; j < output_width; j++) { for (int k=0; k < output_width; k++) { m = -FLT_MAX; - for (int a=0; a < size; a++) { - for (int b=0; b < size; b++) { - m = fmaxf(m, input[i][stride*j +a][stride*k +b]); + for (int a=-padding; a < max_move; a++) { + for (int b=-padding; b < max_move; b++) { + int j_2 = stride*j +a; + int k_2 = stride*k +b; + if (pooling_not_outside(j_2, k_2, 0, output_width)) { + m = fmaxf(m, input[i][j_2][k_2]); + } } } output[i][j][k] = m; @@ -142,11 +165,11 @@ void make_max_pooling_cpu(float*** input, float*** output, int size, int output_ #ifdef __CUDACC__ extern "C" #endif -void make_max_pooling(float*** input, float*** output, int size, int output_depth, int output_width, int stride) { +void make_max_pooling(float*** input, float*** output, int size, int output_depth, int output_width, int stride, int padding) { #ifndef __CUDACC__ - make_max_pooling_cpu(input, output, size, output_depth, output_width, stride); + make_max_pooling_cpu(input, output, size, output_depth, output_width, stride, padding); #else - make_max_pooling_device(input, output, size, output_depth, output_width, stride); + make_max_pooling_device(input, output, size, output_depth, output_width, stride, padding); #endif }