mirror of
https://github.com/augustin64/projet-tipe
synced 2025-01-23 15:16:26 +01:00
Add 'stride' and 'padding' to the forward
This commit is contained in:
parent
a68805894f
commit
f316882eeb
@ -177,6 +177,8 @@ void forward_propagation(Network* network) {
|
||||
|
||||
int activation = k_i->activation;
|
||||
int pooling = k_i->pooling;
|
||||
int stride = k_i->stride;
|
||||
int padding = k_i->padding;
|
||||
|
||||
if (k_i->nn) {
|
||||
drop_neurones(input, 1, 1, input_width, network->dropout);
|
||||
@ -189,29 +191,33 @@ void forward_propagation(Network* network) {
|
||||
* On copie les valeurs de output dans output_z, puis on applique la fonction d'activation à output_z
|
||||
*/
|
||||
if (k_i->cnn) { // Convolution
|
||||
make_convolution(k_i->cnn, input, output, output_width, 1);
|
||||
make_convolution(k_i->cnn, input, output, output_width, stride, padding);
|
||||
copy_3d_array(output, output_z, output_depth, output_width, output_width);
|
||||
apply_function_to_matrix(activation, output, output_depth, output_width);
|
||||
}
|
||||
else if (k_i->nn) { // Full connection
|
||||
if (k_i->linearisation == DOESNT_LINEARISE) { // Vecteur -> Vecteur
|
||||
make_dense(k_i->nn, input[0][0], output[0][0], input_width, output_width);
|
||||
} else { // Matrice -> Vecteur
|
||||
}
|
||||
else { // Matrice -> Vecteur
|
||||
make_dense_linearized(k_i->nn, input, output[0][0], input_depth, input_width, output_width);
|
||||
}
|
||||
copy_3d_array(output, output_z, 1, 1, output_width);
|
||||
apply_function_to_vector(activation, output, output_width);
|
||||
}
|
||||
else { // Pooling
|
||||
int kernel_size = 2*padding + input_width + stride - output_width*stride;
|
||||
if (i == n-2) {
|
||||
printf_error("Le réseau ne peut pas finir par un pooling layer\n");
|
||||
return;
|
||||
} else { // Pooling sur une matrice
|
||||
if (pooling == AVG_POOLING) {
|
||||
make_average_pooling(input, output, input_width/output_width, output_depth, output_width, input_width/output_width);
|
||||
} else if (pooling == MAX_POOLING) {
|
||||
make_max_pooling(input, output, input_width/output_width, output_depth, output_width, input_width/output_width);
|
||||
} else {
|
||||
make_average_pooling(input, output, kernel_size, output_depth, output_width, stride, padding);
|
||||
}
|
||||
else if (pooling == MAX_POOLING) {
|
||||
make_max_pooling(input, output, kernel_size, output_depth, output_width, stride, padding);
|
||||
}
|
||||
else {
|
||||
printf_error("Impossible de reconnaître le type de couche de pooling: ");
|
||||
printf("identifiant: %d, position: %d\n", pooling, i);
|
||||
}
|
||||
|
@ -8,14 +8,23 @@
|
||||
#include "include/config.h"
|
||||
|
||||
|
||||
void make_convolution_cpu(Kernel_cnn* kernel, float*** input, float*** output, int output_dim, int stride) {
|
||||
int convolution_not_outside(int x, int y, int lower_bound, int upper_bound) {
|
||||
// On renvoie true si et seulement si _ et _:
|
||||
// lower_bound <= x < upper_bound
|
||||
// lower_bound <= y < upper_bound
|
||||
|
||||
return !(x < lower_bound || y < lower_bound || x >= upper_bound || y>= upper_bound);
|
||||
}
|
||||
|
||||
void make_convolution_cpu(Kernel_cnn* kernel, float*** input, float*** output, int output_dim, int stride, int padding) {
|
||||
// c'est le kernel de input
|
||||
// input[kernel->rows][kernel_k_size + output_dim-1][kernel_k_size + output_dim-1]
|
||||
// output[kernel->columns][output_dim][output_dim]
|
||||
|
||||
int k_size = kernel->k_size;
|
||||
int k_columns = kernel->columns;
|
||||
int k_rows = kernel->rows;
|
||||
int max_move = kernel->k_size - padding;
|
||||
int input_dim = output_dim*stride - 2*padding + kernel->k_size - stride;
|
||||
float f;
|
||||
|
||||
for (int i=0; i < k_columns; i++) { // filtre
|
||||
@ -23,12 +32,16 @@ void make_convolution_cpu(Kernel_cnn* kernel, float*** input, float*** output, i
|
||||
for (int k=0; k < output_dim; k++) { // colonne de sortie
|
||||
f = kernel->bias[i][j][k];
|
||||
for (int a=0; a < k_rows; a++) { // Canal de couleur
|
||||
for (int b=0; b < k_size; b++) { // ligne du filtre
|
||||
for (int c=0; c < k_size; c++) { // colonne du filtre
|
||||
for (int b=-padding; b < max_move; b++) { // ligne du filtre
|
||||
for (int c=-padding; c < max_move; c++) { // colonne du filtre
|
||||
int x = (stride*j+b);
|
||||
int y = (stride*k+c);
|
||||
if (convolution_not_outside(x, y, 0, input_dim)) {
|
||||
f += kernel->weights[a][i][b][c]*input[a][stride*j+b][stride*k+c];
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
output[i][j][k] = f;
|
||||
}
|
||||
}
|
||||
@ -37,11 +50,13 @@ void make_convolution_cpu(Kernel_cnn* kernel, float*** input, float*** output, i
|
||||
|
||||
#ifdef __CUDACC__
|
||||
|
||||
__global__ void make_convolution_kernel(Kernel_cnn* kernel, float*** input, float*** output, int output_dim, int stride) {
|
||||
__global__ void make_convolution_kernel(Kernel_cnn* kernel, float*** input, float*** output, int output_dim, int stride, int padding) {
|
||||
// Équivalents respectifs de i, j et k dans la boucle effectuée par le cpu
|
||||
int idx = threadIdx.x + blockDim.x*blockIdx.x; // < kernel->columns
|
||||
int idy = threadIdx.y + blockDim.y*blockIdx.y; // < min(output_dim, k_size)
|
||||
int idz = threadIdx.z + blockDim.z*blockIdx.z; // < min(output_dim, k_size)
|
||||
int max_move = kernel->k_size - padding;
|
||||
int input_dim = output_dim*stride - 2*padding + kernel->k_size - stride;
|
||||
|
||||
if (idx >= kernel->columns || idy >= output_dim || idz >= output_dim) {
|
||||
return;
|
||||
@ -50,9 +65,13 @@ __global__ void make_convolution_kernel(Kernel_cnn* kernel, float*** input, floa
|
||||
float f = kernel->bias[idx][idy][idz];
|
||||
|
||||
for (int a=0; a < kernel->rows; a++) {
|
||||
for (int b=0; b < kernel->k_size; b++) {
|
||||
for (int c=0; c < kernel->k_size; c++) {
|
||||
f += kernel->weights[a][idx][b][c]*input[a][idy*stride+b][idz*stride+c];
|
||||
for (int b=-padding; b < max_move; b++) {
|
||||
for (int c=-padding; c < max_move; c++) {
|
||||
int idy_2 = idy*stride+b;
|
||||
int idz_2 = idz*stride+c;
|
||||
if (convolution_not_outside(idy_2, idz_2, 0, input_dim)) {
|
||||
f += kernel->weights[a][idx][b][c]*input[a][idy_2][idz_2];
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
@ -60,21 +79,21 @@ __global__ void make_convolution_kernel(Kernel_cnn* kernel, float*** input, floa
|
||||
output[idx][idy][idz] = f;
|
||||
}
|
||||
|
||||
void make_convolution_device(Kernel_cnn* kernel, float*** input, float*** output, int output_dim, int stride) {
|
||||
void make_convolution_device(Kernel_cnn* kernel, float*** input, float*** output, int output_dim, int stride, int padding) {
|
||||
// Make computation
|
||||
dim3 gridSize(i_div_up(kernel->columns, BLOCKSIZE_x), i_div_up(output_dim, BLOCKSIZE_y), i_div_up(output_dim, BLOCKSIZE_z));
|
||||
dim3 blockSize(BLOCKSIZE_x, BLOCKSIZE_y, BLOCKSIZE_z);
|
||||
|
||||
make_convolution_kernel<<<gridSize, blockSize>>>(kernel, input, output, output_dim, stride);
|
||||
make_convolution_kernel<<<gridSize, blockSize>>>(kernel, input, output, output_dim, stride, padding);
|
||||
gpuErrchk( cudaPeekAtLastError() );
|
||||
gpuErrchk( cudaDeviceSynchronize() );
|
||||
}
|
||||
#endif
|
||||
|
||||
void make_convolution(Kernel_cnn* kernel, float*** input, float*** output, int output_dim, int stride) {
|
||||
void make_convolution(Kernel_cnn* kernel, float*** input, float*** output, int output_dim, int stride, int padding) {
|
||||
#ifndef __CUDACC__
|
||||
make_convolution_cpu(kernel, input, output, output_dim, stride);
|
||||
make_convolution_cpu(kernel, input, output, output_dim, stride, padding);
|
||||
#else
|
||||
make_convolution_device(kernel, input, output, output_dim, stride);
|
||||
make_convolution_device(kernel, input, output, output_dim, stride, padding);
|
||||
#endif
|
||||
}
|
@ -8,14 +8,23 @@
|
||||
#include "include/config.h"
|
||||
|
||||
|
||||
void make_convolution_cpu(Kernel_cnn* kernel, float*** input, float*** output, int output_dim, int stride) {
|
||||
int convolution_not_outside(int x, int y, int lower_bound, int upper_bound) {
|
||||
// On renvoie true si et seulement si _ et _:
|
||||
// lower_bound <= x < upper_bound
|
||||
// lower_bound <= y < upper_bound
|
||||
|
||||
return !(x < lower_bound || y < lower_bound || x >= upper_bound || y>= upper_bound);
|
||||
}
|
||||
|
||||
void make_convolution_cpu(Kernel_cnn* kernel, float*** input, float*** output, int output_dim, int stride, int padding) {
|
||||
// c'est le kernel de input
|
||||
// input[kernel->rows][kernel_k_size + output_dim-1][kernel_k_size + output_dim-1]
|
||||
// output[kernel->columns][output_dim][output_dim]
|
||||
|
||||
int k_size = kernel->k_size;
|
||||
int k_columns = kernel->columns;
|
||||
int k_rows = kernel->rows;
|
||||
int max_move = kernel->k_size - padding;
|
||||
int input_dim = output_dim*stride - 2*padding + kernel->k_size - stride;
|
||||
float f;
|
||||
|
||||
for (int i=0; i < k_columns; i++) { // filtre
|
||||
@ -23,12 +32,16 @@ void make_convolution_cpu(Kernel_cnn* kernel, float*** input, float*** output, i
|
||||
for (int k=0; k < output_dim; k++) { // colonne de sortie
|
||||
f = kernel->bias[i][j][k];
|
||||
for (int a=0; a < k_rows; a++) { // Canal de couleur
|
||||
for (int b=0; b < k_size; b++) { // ligne du filtre
|
||||
for (int c=0; c < k_size; c++) { // colonne du filtre
|
||||
for (int b=-padding; b < max_move; b++) { // ligne du filtre
|
||||
for (int c=-padding; c < max_move; c++) { // colonne du filtre
|
||||
int x = (stride*j+b);
|
||||
int y = (stride*k+c);
|
||||
if (convolution_not_outside(x, y, 0, input_dim)) {
|
||||
f += kernel->weights[a][i][b][c]*input[a][stride*j+b][stride*k+c];
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
output[i][j][k] = f;
|
||||
}
|
||||
}
|
||||
@ -37,11 +50,13 @@ void make_convolution_cpu(Kernel_cnn* kernel, float*** input, float*** output, i
|
||||
|
||||
#ifdef __CUDACC__
|
||||
|
||||
__global__ void make_convolution_kernel(Kernel_cnn* kernel, float*** input, float*** output, int output_dim, int stride) {
|
||||
__global__ void make_convolution_kernel(Kernel_cnn* kernel, float*** input, float*** output, int output_dim, int stride, int padding) {
|
||||
// Équivalents respectifs de i, j et k dans la boucle effectuée par le cpu
|
||||
int idx = threadIdx.x + blockDim.x*blockIdx.x; // < kernel->columns
|
||||
int idy = threadIdx.y + blockDim.y*blockIdx.y; // < min(output_dim, k_size)
|
||||
int idz = threadIdx.z + blockDim.z*blockIdx.z; // < min(output_dim, k_size)
|
||||
int max_move = kernel->k_size - padding;
|
||||
int input_dim = output_dim*stride - 2*padding + kernel->k_size - stride;
|
||||
|
||||
if (idx >= kernel->columns || idy >= output_dim || idz >= output_dim) {
|
||||
return;
|
||||
@ -50,9 +65,13 @@ __global__ void make_convolution_kernel(Kernel_cnn* kernel, float*** input, floa
|
||||
float f = kernel->bias[idx][idy][idz];
|
||||
|
||||
for (int a=0; a < kernel->rows; a++) {
|
||||
for (int b=0; b < kernel->k_size; b++) {
|
||||
for (int c=0; c < kernel->k_size; c++) {
|
||||
f += kernel->weights[a][idx][b][c]*input[a][idy*stride+b][idz*stride+c];
|
||||
for (int b=-padding; b < max_move; b++) {
|
||||
for (int c=-padding; c < max_move; c++) {
|
||||
int idy_2 = idy*stride+b;
|
||||
int idz_2 = idz*stride+c;
|
||||
if (convolution_not_outside(idy_2, idz_2, 0, input_dim)) {
|
||||
f += kernel->weights[a][idx][b][c]*input[a][idy_2][idz_2];
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
@ -60,21 +79,21 @@ __global__ void make_convolution_kernel(Kernel_cnn* kernel, float*** input, floa
|
||||
output[idx][idy][idz] = f;
|
||||
}
|
||||
|
||||
void make_convolution_device(Kernel_cnn* kernel, float*** input, float*** output, int output_dim, int stride) {
|
||||
void make_convolution_device(Kernel_cnn* kernel, float*** input, float*** output, int output_dim, int stride, int padding) {
|
||||
// Make computation
|
||||
dim3 gridSize(i_div_up(kernel->columns, BLOCKSIZE_x), i_div_up(output_dim, BLOCKSIZE_y), i_div_up(output_dim, BLOCKSIZE_z));
|
||||
dim3 blockSize(BLOCKSIZE_x, BLOCKSIZE_y, BLOCKSIZE_z);
|
||||
|
||||
make_convolution_kernel<<<gridSize, blockSize>>>(kernel, input, output, output_dim, stride);
|
||||
make_convolution_kernel<<<gridSize, blockSize>>>(kernel, input, output, output_dim, stride, padding);
|
||||
gpuErrchk( cudaPeekAtLastError() );
|
||||
gpuErrchk( cudaDeviceSynchronize() );
|
||||
}
|
||||
#endif
|
||||
|
||||
void make_convolution(Kernel_cnn* kernel, float*** input, float*** output, int output_dim, int stride) {
|
||||
void make_convolution(Kernel_cnn* kernel, float*** input, float*** output, int output_dim, int stride, int padding) {
|
||||
#ifndef __CUDACC__
|
||||
make_convolution_cpu(kernel, input, output, output_dim, stride);
|
||||
make_convolution_cpu(kernel, input, output, output_dim, stride, padding);
|
||||
#else
|
||||
make_convolution_device(kernel, input, output, output_dim, stride);
|
||||
make_convolution_device(kernel, input, output, output_dim, stride, padding);
|
||||
#endif
|
||||
}
|
@ -40,10 +40,10 @@ Network* create_network(int max_size, float learning_rate, int dropout, int acti
|
||||
|
||||
Network* create_network_lenet5(float learning_rate, int dropout, int activation, int initialisation, int input_dim, int input_depth) {
|
||||
Network* network = create_network(8, learning_rate, dropout, activation, initialisation, input_dim, input_depth);
|
||||
add_convolution(network, 6, 28, activation);
|
||||
add_average_pooling(network, 14);
|
||||
add_convolution(network, 16, 10, activation);
|
||||
add_average_pooling(network, 5);
|
||||
add_convolution(network, 5, 6, 1, 0, activation);
|
||||
add_average_pooling(network, 2, 2, 0);
|
||||
add_convolution(network, 5, 16, 1, 0, activation);
|
||||
add_average_pooling(network, 2, 2, 0);
|
||||
add_dense_linearisation(network, 120, activation);
|
||||
add_dense(network, 84, activation);
|
||||
add_dense(network, 10, SOFTMAX);
|
||||
@ -97,51 +97,51 @@ void create_a_line_input_z_layer(Network* network, int pos, int dim) {
|
||||
network->depth[pos] = 1;
|
||||
}
|
||||
|
||||
void add_average_pooling(Network* network, int dim_output) {
|
||||
void add_average_pooling(Network* network, int kernel_size, int stride, int padding) {
|
||||
int n = network->size;
|
||||
int k_pos = n-1;
|
||||
int dim_input = network->width[k_pos];
|
||||
if (network->max_size == n) {
|
||||
printf_error("Impossible de rajouter une couche d'average pooling, le réseau est déjà plein\n");
|
||||
return;
|
||||
}
|
||||
if (dim_input%dim_output != 0) {
|
||||
printf_error("Dimension de l'average pooling incorrecte\n");
|
||||
return;
|
||||
}
|
||||
int dim_input = network->width[k_pos];
|
||||
int dim_output = (2*padding + dim_input - (kernel_size - stride))/stride;
|
||||
|
||||
network->kernel[k_pos]->cnn = NULL;
|
||||
network->kernel[k_pos]->nn = NULL;
|
||||
network->kernel[k_pos]->stride = stride;
|
||||
network->kernel[k_pos]->padding = padding;
|
||||
network->kernel[k_pos]->activation = IDENTITY; // Ne contient pas de fonction d'activation
|
||||
network->kernel[k_pos]->linearisation = DOESNT_LINEARISE;
|
||||
network->kernel[k_pos]->pooling = AVG_POOLING;
|
||||
create_a_cube_input_layer(network, n, network->depth[n-1], network->width[n-1]/2);
|
||||
create_a_cube_input_z_layer(network, n, network->depth[n-1], network->width[n-1]/2);
|
||||
|
||||
create_a_cube_input_layer(network, n, network->depth[n-1], dim_output);
|
||||
create_a_cube_input_z_layer(network, n, network->depth[n-1], dim_output);
|
||||
network->size++;
|
||||
}
|
||||
|
||||
void add_max_pooling(Network* network, int dim_output) {
|
||||
void add_max_pooling(Network* network, int kernel_size, int stride, int padding) {
|
||||
int n = network->size;
|
||||
int k_pos = n-1;
|
||||
int dim_input = network->width[k_pos];
|
||||
if (network->max_size == n) {
|
||||
printf_error("Impossible de rajouter une couche de max pooling, le réseau est déjà plein\n");
|
||||
return;
|
||||
}
|
||||
if (dim_input%dim_output != 0) {
|
||||
printf_error("Dimension du max pooling incorrecte\n");
|
||||
return;
|
||||
}
|
||||
int dim_input = network->width[k_pos];
|
||||
int dim_output = (2*padding + dim_input - (kernel_size - stride))/stride;
|
||||
|
||||
network->kernel[k_pos]->cnn = NULL;
|
||||
network->kernel[k_pos]->nn = NULL;
|
||||
network->kernel[k_pos]->activation = IDENTITY; // Ne contient pas de fonction d'activation
|
||||
network->kernel[k_pos]->linearisation = DOESNT_LINEARISE;
|
||||
network->kernel[k_pos]->pooling = MAX_POOLING;
|
||||
create_a_cube_input_layer(network, n, network->depth[n-1], network->width[n-1]/2);
|
||||
create_a_cube_input_z_layer(network, n, network->depth[n-1], network->width[n-1]/2);
|
||||
|
||||
create_a_cube_input_layer(network, n, network->depth[n-1], dim_output);
|
||||
create_a_cube_input_z_layer(network, n, network->depth[n-1], dim_output);
|
||||
network->size++;
|
||||
}
|
||||
|
||||
void add_convolution(Network* network, int depth_output, int dim_output, int activation) {
|
||||
void add_convolution(Network* network, int kernel_size, int number_of_kernels, int stride, int padding, int activation) {
|
||||
int n = network->size;
|
||||
int k_pos = n-1;
|
||||
if (network->max_size == n) {
|
||||
@ -151,18 +151,24 @@ void add_convolution(Network* network, int depth_output, int dim_output, int act
|
||||
int depth_input = network->depth[k_pos];
|
||||
int dim_input = network->width[k_pos];
|
||||
|
||||
int dim_output = (2*padding + dim_input - (kernel_size - stride))/stride;
|
||||
int depth_output = number_of_kernels;
|
||||
|
||||
int bias_size = dim_output;
|
||||
int kernel_size = dim_input - dim_output +1;
|
||||
|
||||
network->kernel[k_pos]->nn = NULL;
|
||||
network->kernel[k_pos]->stride = stride;
|
||||
network->kernel[k_pos]->padding = padding;
|
||||
network->kernel[k_pos]->activation = activation;
|
||||
network->kernel[k_pos]->linearisation = DOESNT_LINEARISE;
|
||||
network->kernel[k_pos]->pooling = NO_POOLING;
|
||||
|
||||
network->kernel[k_pos]->cnn = (Kernel_cnn*)nalloc(1, sizeof(Kernel_cnn));
|
||||
Kernel_cnn* cnn = network->kernel[k_pos]->cnn;
|
||||
|
||||
cnn->k_size = kernel_size;
|
||||
cnn->rows = depth_input;
|
||||
cnn->columns = depth_output;
|
||||
|
||||
cnn->weights = (float****)nalloc(depth_input, sizeof(float***));
|
||||
cnn->d_weights = (float****)nalloc(depth_input, sizeof(float***));
|
||||
#ifdef ADAM_CNN_WEIGHTS
|
||||
@ -200,6 +206,7 @@ void add_convolution(Network* network, int depth_output, int dim_output, int act
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
cnn->bias = (float***)nalloc(depth_output, sizeof(float**));
|
||||
cnn->d_bias = (float***)nalloc(depth_output, sizeof(float**));
|
||||
#ifdef ADAM_CNN_BIAS
|
||||
@ -229,6 +236,7 @@ void add_convolution(Network* network, int depth_output, int dim_output, int act
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
int n_in = network->width[n-1]*network->width[n-1]*network->depth[n-1];
|
||||
int n_out = network->width[n]*network->width[n]*network->depth[n];
|
||||
initialisation_3d_matrix(network->initialisation, cnn->bias, depth_output, dim_output, dim_output, n_in, n_out);
|
||||
@ -247,13 +255,17 @@ void add_dense(Network* network, int size_output, int activation) {
|
||||
return;
|
||||
}
|
||||
network->kernel[k_pos]->cnn = NULL;
|
||||
network->kernel[k_pos]->nn = (Kernel_nn*)nalloc(1, sizeof(Kernel_nn));
|
||||
Kernel_nn* nn = network->kernel[k_pos]->nn;
|
||||
network->kernel[k_pos]->stride = -1; // N'est pas utilisé dans une couche dense
|
||||
network->kernel[k_pos]->padding = -1; // N'est pas utilisé dans une couche dense
|
||||
network->kernel[k_pos]->activation = activation;
|
||||
network->kernel[k_pos]->linearisation = DOESNT_LINEARISE;
|
||||
network->kernel[k_pos]->pooling = NO_POOLING;
|
||||
|
||||
network->kernel[k_pos]->nn = (Kernel_nn*)nalloc(1, sizeof(Kernel_nn));
|
||||
Kernel_nn* nn = network->kernel[k_pos]->nn;
|
||||
nn->size_input = size_input;
|
||||
nn->size_output = size_output;
|
||||
|
||||
nn->bias = (float*)nalloc(size_output, sizeof(float));
|
||||
nn->d_bias = (float*)nalloc(size_output, sizeof(float));
|
||||
#ifdef ADAM_DENSE_BIAS
|
||||
@ -308,11 +320,14 @@ void add_dense_linearisation(Network* network, int size_output, int activation)
|
||||
return;
|
||||
}
|
||||
network->kernel[k_pos]->cnn = NULL;
|
||||
network->kernel[k_pos]->nn = (Kernel_nn*)nalloc(1, sizeof(Kernel_nn));
|
||||
Kernel_nn* nn = network->kernel[k_pos]->nn;
|
||||
network->kernel[k_pos]->stride = -1; // N'est pas utilisé dans une couche dense
|
||||
network->kernel[k_pos]->padding = -1; // N'est pas utilisé dans une couche dense
|
||||
network->kernel[k_pos]->activation = activation;
|
||||
network->kernel[k_pos]->linearisation = DO_LINEARISE;
|
||||
network->kernel[k_pos]->pooling = NO_POOLING;
|
||||
|
||||
network->kernel[k_pos]->nn = (Kernel_nn*)nalloc(1, sizeof(Kernel_nn));
|
||||
Kernel_nn* nn = network->kernel[k_pos]->nn;
|
||||
nn->size_input = size_input;
|
||||
nn->size_output = size_output;
|
||||
|
||||
@ -329,6 +344,7 @@ void add_dense_linearisation(Network* network, int size_output, int activation)
|
||||
nn->v_d_bias[i] = 0.;
|
||||
#endif
|
||||
}
|
||||
|
||||
nn->weights = (float**)nalloc(size_input, sizeof(float*));
|
||||
nn->d_weights = (float**)nalloc(size_input, sizeof(float*));
|
||||
#ifdef ADAM_DENSE_WEIGHTS
|
||||
@ -350,6 +366,7 @@ void add_dense_linearisation(Network* network, int size_output, int activation)
|
||||
#endif
|
||||
}
|
||||
}
|
||||
|
||||
initialisation_1d_matrix(network->initialisation, nn->bias, size_output, size_input, size_output);
|
||||
initialisation_2d_matrix(network->initialisation, nn->weights, size_input, size_output, size_input, size_output);
|
||||
create_a_line_input_layer(network, n, size_output);
|
||||
|
@ -3,21 +3,21 @@
|
||||
/*
|
||||
* Effectue la convolution naïvement sur le processeur
|
||||
*/
|
||||
void make_convolution_cpu(Kernel_cnn* kernel, float*** input, float*** output, int output_dim, int stride);
|
||||
void make_convolution_cpu(Kernel_cnn* kernel, float*** input, float*** output, int output_dim, int stride, int padding);
|
||||
|
||||
#ifdef __CUDACC__
|
||||
/*
|
||||
* Kernel de la convolution sur carte graphique
|
||||
*/
|
||||
__global__ void make_convolution_kernel(int k_size, int columns, int rows, float* bias, size_t pitch_bias, float**** weights, size_t pitch_weights, float*** input, size_t pitch_input, float*** output, size_t pitch_output, int output_dim, int stride);
|
||||
__global__ void make_convolution_kernel(int k_size, int columns, int rows, float* bias, size_t pitch_bias, float**** weights, size_t pitch_weights, float*** input, size_t pitch_input, float*** output, size_t pitch_output, int output_dim, int stride, int padding);
|
||||
|
||||
/*
|
||||
* Effectue la convolution naïvement sur la carte graphique
|
||||
*/
|
||||
void make_convolution_device(Kernel_cnn* kernel, float*** input, float*** output, int output_dim, int stride);
|
||||
void make_convolution_device(Kernel_cnn* kernel, float*** input, float*** output, int output_dim, int stride, int padding);
|
||||
#endif
|
||||
|
||||
/*
|
||||
* Détermine si la convolution peut-être faite sur la carte graphique au moment de la compilation
|
||||
*/
|
||||
void make_convolution(Kernel_cnn* kernel, float*** input, float*** output, int output_dim, int stride);
|
||||
void make_convolution(Kernel_cnn* kernel, float*** input, float*** output, int output_dim, int stride, int padding);
|
@ -35,19 +35,24 @@ void create_a_cube_input_z_layer(Network* network, int pos, int depth, int dim);
|
||||
void create_a_line_input_layer(Network* network, int pos, int dim);
|
||||
|
||||
/*
|
||||
* Ajoute au réseau une couche d'average pooling valide de dimension dim*dim
|
||||
* Ajoute au réseau une couche d'average pooling avec la taille de noyau (kernel_size),
|
||||
* le remplissage (padding) et le décalge (stride) choisis
|
||||
*/
|
||||
void add_average_pooling(Network* network, int dim_output);
|
||||
void add_average_pooling(Network* network, int kernel_size, int stride, int padding);
|
||||
|
||||
/*
|
||||
* Ajoute au réseau une couche de max pooling valide de dimension dim*dim
|
||||
* Ajoute au réseau une couche de max pooling avec la taille de noyau (kernel_size),
|
||||
* le remplissage (padding) et le décalge (stride) choisis
|
||||
*/
|
||||
void add_max_pooling(Network* network, int dim_output);
|
||||
void add_max_pooling(Network* network, int kernel_size, int stride, int padding);
|
||||
|
||||
/*
|
||||
* Ajoute au réseau une couche de convolution dim*dim et initialise les kernels
|
||||
* Ajoute au réseau une couche de convolution avec la taille de noyau (kernel_size),
|
||||
* le remplissage (padding) et le décalge (stride) choisis. Le choix de la profondeur de
|
||||
* la couche suivante se fait avec number_of_kernels (= depth_output)
|
||||
* Puis initialise les poids et les biais construits
|
||||
*/
|
||||
void add_convolution(Network* network, int depth_output, int dim_output, int activation);
|
||||
void add_convolution(Network* network, int kernel_size, int number_of_kernels, int stride, int padding, int activation);
|
||||
|
||||
/*
|
||||
* Ajoute au réseau une couche dense et initialise les poids et les biais
|
||||
|
@ -4,36 +4,44 @@
|
||||
#define DEF_MAKE_H
|
||||
|
||||
/*
|
||||
* Effectue une convolution sans stride sur le processeur
|
||||
*
|
||||
* On renvoie true si et seulement si _ et _:
|
||||
* lower_bound <= y < upper_bound
|
||||
* lower_bound <= x < upper_bound
|
||||
*/
|
||||
void make_convolution_cpu(Kernel_cnn* kernel, float*** input, float*** output, int output_dim, int stride);
|
||||
int pooling_not_outside(int x, int y, int lower_bound, int upper_bound);
|
||||
|
||||
/*
|
||||
* Effectue la convolution sur le CPU ou GPU
|
||||
* Effectue la propagation d'une convolution avec stride et padding choisis sur le processeur
|
||||
*/
|
||||
void make_convolution(Kernel_cnn* kernel, float*** input, float*** output, int output_dim, int stride);
|
||||
void make_convolution_cpu(Kernel_cnn* kernel, float*** input, float*** output, int output_dim, int stride, int padding);
|
||||
|
||||
/*
|
||||
* Effectue la propagation d'une convolution avec stride et padding choisis sur le CPU ou GPU
|
||||
*/
|
||||
void make_convolution(Kernel_cnn* kernel, float*** input, float*** output, int output_dim, int stride, int padding);
|
||||
|
||||
#ifdef __CUDACC__
|
||||
extern "C"
|
||||
#endif
|
||||
/*
|
||||
* Effectue un average pooling avec stride=size
|
||||
* Effectue propagation d'average pooling avec stride et padding choisis
|
||||
*/
|
||||
void make_average_pooling(float*** input, float*** output, int size, int output_depth, int output_dim, int stride);
|
||||
void make_average_pooling(float*** input, float*** output, int size, int output_depth, int output_dim, int stride, int padding);
|
||||
|
||||
#ifdef __CUDACC__
|
||||
extern "C"
|
||||
#endif
|
||||
/*
|
||||
* Effectue un max pooling avec stride=size
|
||||
* Effectue propagation de max pooling avec stride et padding choisis
|
||||
*/
|
||||
void make_max_pooling(float*** input, float*** output, int size, int output_depth, int output_dim, int stride);
|
||||
void make_max_pooling(float*** input, float*** output, int size, int output_depth, int output_dim, int stride, int padding);
|
||||
|
||||
#ifdef __CUDACC__
|
||||
extern "C"
|
||||
#endif
|
||||
/*
|
||||
* Effectue une full connection
|
||||
* Effectue la propagation d'une couche dense
|
||||
*/
|
||||
void make_dense(Kernel_nn* kernel, float* input, float* output, int size_input, int size_output);
|
||||
|
||||
@ -41,7 +49,7 @@ void make_dense(Kernel_nn* kernel, float* input, float* output, int size_input,
|
||||
extern "C"
|
||||
#endif
|
||||
/*
|
||||
* Effectue une full connection qui passe d'une matrice à un vecteur
|
||||
* Effectue la propagation d'une couche dense qui passe d'une matrice à un vecteur
|
||||
*/
|
||||
void make_dense_linearized(Kernel_nn* kernel, float*** input, float* output, int depth_input, int dim_input, int size_output);
|
||||
|
||||
|
@ -16,14 +16,14 @@ typedef struct Kernel_cnn {
|
||||
int rows; // Depth de l'input
|
||||
int columns; // Depth de l'output
|
||||
|
||||
float*** bias; // bias[columns][dim_output][dim_output]
|
||||
float*** bias; // bias[columns][dim_output][dim_output] <=> bias[depth output][dim output][dim output]
|
||||
float*** d_bias; // d_bias[columns][dim_output][dim_output]
|
||||
#ifdef ADAM_CNN_BIAS
|
||||
float*** s_d_bias; // s_d_bias[columns][dim_output][dim_output]
|
||||
float*** v_d_bias; // v_d_bias[columns][dim_output][dim_output]
|
||||
#endif
|
||||
|
||||
float**** weights; // weights[rows][columns][k_size][k_size]
|
||||
float**** weights; // weights[rows][columns][k_size][k_size] <=> weights[depth input][depth output][size kernel][size kernel]
|
||||
float**** d_weights; // d_weights[rows][columns][k_size][k_size]
|
||||
#ifdef ADAM_CNN_WEIGHTS
|
||||
float**** s_d_weights; // s_d_weights[rows][columns][k_size][k_size]
|
||||
@ -58,6 +58,8 @@ typedef struct Kernel {
|
||||
int activation; // Id de la fonction d'activation et -Id de sa dérivée
|
||||
int linearisation; // 1 si c'est la linéarisation d'une couche, 0 sinon
|
||||
int pooling; // 0 si pas pooling, 1 si average_pooling, 2 si max_pooling
|
||||
int stride; // Valable uniquement une pooling et un cnn
|
||||
int padding; // Valable uniquement une pooling et un cnn
|
||||
} Kernel;
|
||||
|
||||
|
||||
|
@ -10,59 +10,72 @@
|
||||
|
||||
#include "include/config.h"
|
||||
|
||||
int pooling_not_outside(int x, int y, int lower_bound, int upper_bound) {
|
||||
return !(x < lower_bound || y < lower_bound || x >= upper_bound || y>= upper_bound);
|
||||
}
|
||||
|
||||
/*
|
||||
* Average Pooling
|
||||
*/
|
||||
#ifdef __CUDACC__
|
||||
__global__ void make_average_pooling_kernel(float*** input, float*** output, int size, int output_depth, int output_width, int stride) {
|
||||
__global__ void make_average_pooling_kernel(float*** input, float*** output, int size, int output_depth, int output_width, int stride, int padding) {
|
||||
// Équivalents respectifs de i, j et k dans la boucle effectuée par le cpu
|
||||
int idx = threadIdx.x + blockDim.x*blockIdx.x; // < output_depth
|
||||
int idy = threadIdx.y + blockDim.y*blockIdx.y; // < output_width
|
||||
int idz = threadIdx.z + blockDim.z*blockIdx.z; // < output_width
|
||||
int n = size*size;
|
||||
int max_move = size - padding;
|
||||
|
||||
if (idx >= output_depth || idy >= output_width || idz >= output_width) {
|
||||
return;
|
||||
}
|
||||
|
||||
int nb_elements = 0;
|
||||
float sum = 0;
|
||||
|
||||
for (int a=0; a < size; a++) {
|
||||
for (int b=0; b < size; b++) {
|
||||
sum += input[idx][stride*idy +a][stride*idz +b];
|
||||
for (int a=-padding; a < max_move; a++) {
|
||||
for (int b=-padding; b < max_move; b++) {
|
||||
int idy_2 = stride*idy +a;
|
||||
int idz_2 = stride*idz +b;
|
||||
if (pooling_not_outside(idy_2, idz_2, 0, output_width)) {
|
||||
sum += input[idx][idy_2][idz_2];
|
||||
}
|
||||
}
|
||||
output[idx][idy][idz] = sum/(float)n;
|
||||
}
|
||||
output[idx][idy][idz] = sum/(float)nb_elements;
|
||||
}
|
||||
|
||||
void make_average_pooling_device(float*** input, float*** output, int size, int output_depth, int output_width, int stride) {
|
||||
void make_average_pooling_device(float*** input, float*** output, int size, int output_depth, int output_width, int stride, int padding) {
|
||||
// Make computation
|
||||
dim3 gridSize(i_div_up(output_depth, BLOCKSIZE_x), i_div_up(output_width, BLOCKSIZE_y), i_div_up(output_width, BLOCKSIZE_z));
|
||||
dim3 blockSize(BLOCKSIZE_x, BLOCKSIZE_y, BLOCKSIZE_z);
|
||||
|
||||
make_average_pooling_kernel<<<gridSize, blockSize>>>(input, output, size, output_depth, output_width, stride);
|
||||
make_average_pooling_kernel<<<gridSize, blockSize>>>(input, output, size, output_depth, output_width, stride, padding);
|
||||
gpuErrchk( cudaPeekAtLastError() );
|
||||
gpuErrchk( cudaDeviceSynchronize() );
|
||||
}
|
||||
#endif
|
||||
|
||||
void make_average_pooling_cpu(float*** input, float*** output, int size, int output_depth, int output_width, int stride) {
|
||||
void make_average_pooling_cpu(float*** input, float*** output, int size, int output_depth, int output_width, int stride, int padding) {
|
||||
// input[output_depth][output_width+size-1][output_width+size-1]
|
||||
// output[output_depth][output_width][output_width]
|
||||
float sum;
|
||||
int n = size*size;
|
||||
int max_move = size - padding;
|
||||
|
||||
for (int i=0; i < output_depth; i++) {
|
||||
for (int j=0; j < output_width; j++) {
|
||||
for (int k=0; k < output_width; k++) {
|
||||
sum = 0;
|
||||
for (int a=0; a < size; a++) {
|
||||
for (int b=0; b < size; b++) {
|
||||
sum += input[i][stride*j +a][stride*k +b];
|
||||
float sum = 0.;
|
||||
int nb_elements = 0;
|
||||
for (int a=-padding; a < max_move; a++) {
|
||||
for (int b=-padding; b < max_move; b++) {
|
||||
int j_2 = stride*j +a;
|
||||
int k_2 = stride*k +b;
|
||||
if (pooling_not_outside(j_2, k_2, 0, output_width)) {
|
||||
sum += input[i][j_2][k_2];
|
||||
nb_elements++;
|
||||
}
|
||||
}
|
||||
output[i][j][k] = sum/(float)n;
|
||||
}
|
||||
output[i][j][k] = sum/(float)nb_elements;
|
||||
}
|
||||
}
|
||||
}
|
||||
@ -71,11 +84,11 @@ void make_average_pooling_cpu(float*** input, float*** output, int size, int out
|
||||
#ifdef __CUDACC__
|
||||
extern "C"
|
||||
#endif
|
||||
void make_average_pooling(float*** input, float*** output, int size, int output_depth, int output_width, int stride) {
|
||||
void make_average_pooling(float*** input, float*** output, int size, int output_depth, int output_width, int stride, int padding) {
|
||||
#ifndef __CUDACC__
|
||||
make_average_pooling_cpu(input, output, size, output_depth, output_width, stride);
|
||||
make_average_pooling_cpu(input, output, size, output_depth, output_width, stride, padding);
|
||||
#else
|
||||
make_average_pooling_device(input, output, size, output_depth, output_width, stride);
|
||||
make_average_pooling_device(input, output, size, output_depth, output_width, stride, padding);
|
||||
#endif
|
||||
}
|
||||
|
||||
@ -87,7 +100,7 @@ void make_average_pooling(float*** input, float*** output, int size, int output_
|
||||
* Max Pooling
|
||||
*/
|
||||
#ifdef __CUDACC__
|
||||
__global__ void make_max_pooling_kernel(float*** input, float*** output, int size, int output_depth, int output_width, int stride) {
|
||||
__global__ void make_max_pooling_kernel(float*** input, float*** output, int size, int output_depth, int output_width, int stride, int padding) {
|
||||
// Équivalents respectifs de i, j et k dans la boucle effectuée par le cpu
|
||||
int idx = threadIdx.x + blockDim.x*blockIdx.x; // < output_depth
|
||||
int idy = threadIdx.y + blockDim.y*blockIdx.y; // < output_width
|
||||
@ -97,40 +110,50 @@ __global__ void make_max_pooling_kernel(float*** input, float*** output, int siz
|
||||
return;
|
||||
}
|
||||
|
||||
int max_move = size - padding;
|
||||
float m = -FLT_MAX;
|
||||
float temp;
|
||||
|
||||
for (int a=0; a < size; a++) {
|
||||
for (int b=0; b < size; b++) {
|
||||
temp = input[idx][stride*idy +a][stride*idz +b];
|
||||
for (int a=-padding; a < max_move; a++) {
|
||||
for (int b=-padding; b < max_move; b++) {
|
||||
int idy_2 = stride*idy +a;
|
||||
int idz_2 = stride*idz +b;
|
||||
if (pooling_not_outside(idy_2, idz_2, 0, output_width)) {
|
||||
temp = input[idx][idy_2][idz_2];
|
||||
m = m > temp ? m : temp; // max(m, temp)
|
||||
}
|
||||
}
|
||||
}
|
||||
output[idx][idy][idz] = m;
|
||||
}
|
||||
|
||||
void make_max_pooling_device(float*** input, float*** output, int size, int output_depth, int output_width, int stride) {
|
||||
void make_max_pooling_device(float*** input, float*** output, int size, int output_depth, int output_width, int stride, int padding) {
|
||||
// Make computation
|
||||
dim3 gridSize(i_div_up(output_depth, BLOCKSIZE_x), i_div_up(output_width, BLOCKSIZE_y), i_div_up(output_width, BLOCKSIZE_z));
|
||||
dim3 blockSize(BLOCKSIZE_x, BLOCKSIZE_y, BLOCKSIZE_z);
|
||||
|
||||
make_max_pooling_kernel<<<gridSize, blockSize>>>(input, output, size, output_depth, output_width, stride);
|
||||
make_max_pooling_kernel<<<gridSize, blockSize>>>(input, output, size, output_depth, output_width, stride, int padding);
|
||||
gpuErrchk( cudaPeekAtLastError() );
|
||||
gpuErrchk( cudaDeviceSynchronize() );
|
||||
}
|
||||
#endif
|
||||
|
||||
void make_max_pooling_cpu(float*** input, float*** output, int size, int output_depth, int output_width, int stride) {
|
||||
void make_max_pooling_cpu(float*** input, float*** output, int size, int output_depth, int output_width, int stride, int padding) {
|
||||
// input[output_depth][output_width+size-1][output_width+size-1]
|
||||
// output[output_depth][output_width][output_width]
|
||||
int max_move = size - padding;
|
||||
float m;
|
||||
for (int i=0; i < output_depth; i++) {
|
||||
for (int j=0; j < output_width; j++) {
|
||||
for (int k=0; k < output_width; k++) {
|
||||
m = -FLT_MAX;
|
||||
for (int a=0; a < size; a++) {
|
||||
for (int b=0; b < size; b++) {
|
||||
m = fmaxf(m, input[i][stride*j +a][stride*k +b]);
|
||||
for (int a=-padding; a < max_move; a++) {
|
||||
for (int b=-padding; b < max_move; b++) {
|
||||
int j_2 = stride*j +a;
|
||||
int k_2 = stride*k +b;
|
||||
if (pooling_not_outside(j_2, k_2, 0, output_width)) {
|
||||
m = fmaxf(m, input[i][j_2][k_2]);
|
||||
}
|
||||
}
|
||||
}
|
||||
output[i][j][k] = m;
|
||||
@ -142,11 +165,11 @@ void make_max_pooling_cpu(float*** input, float*** output, int size, int output_
|
||||
#ifdef __CUDACC__
|
||||
extern "C"
|
||||
#endif
|
||||
void make_max_pooling(float*** input, float*** output, int size, int output_depth, int output_width, int stride) {
|
||||
void make_max_pooling(float*** input, float*** output, int size, int output_depth, int output_width, int stride, int padding) {
|
||||
#ifndef __CUDACC__
|
||||
make_max_pooling_cpu(input, output, size, output_depth, output_width, stride);
|
||||
make_max_pooling_cpu(input, output, size, output_depth, output_width, stride, padding);
|
||||
#else
|
||||
make_max_pooling_device(input, output, size, output_depth, output_width, stride);
|
||||
make_max_pooling_device(input, output, size, output_depth, output_width, stride, padding);
|
||||
#endif
|
||||
}
|
||||
|
||||
|
@ -10,59 +10,72 @@
|
||||
|
||||
#include "include/config.h"
|
||||
|
||||
int pooling_not_outside(int x, int y, int lower_bound, int upper_bound) {
|
||||
return !(x < lower_bound || y < lower_bound || x >= upper_bound || y>= upper_bound);
|
||||
}
|
||||
|
||||
/*
|
||||
* Average Pooling
|
||||
*/
|
||||
#ifdef __CUDACC__
|
||||
__global__ void make_average_pooling_kernel(float*** input, float*** output, int size, int output_depth, int output_width, int stride) {
|
||||
__global__ void make_average_pooling_kernel(float*** input, float*** output, int size, int output_depth, int output_width, int stride, int padding) {
|
||||
// Équivalents respectifs de i, j et k dans la boucle effectuée par le cpu
|
||||
int idx = threadIdx.x + blockDim.x*blockIdx.x; // < output_depth
|
||||
int idy = threadIdx.y + blockDim.y*blockIdx.y; // < output_width
|
||||
int idz = threadIdx.z + blockDim.z*blockIdx.z; // < output_width
|
||||
int n = size*size;
|
||||
int max_move = size - padding;
|
||||
|
||||
if (idx >= output_depth || idy >= output_width || idz >= output_width) {
|
||||
return;
|
||||
}
|
||||
|
||||
int nb_elements = 0;
|
||||
float sum = 0;
|
||||
|
||||
for (int a=0; a < size; a++) {
|
||||
for (int b=0; b < size; b++) {
|
||||
sum += input[idx][stride*idy +a][stride*idz +b];
|
||||
for (int a=-padding; a < max_move; a++) {
|
||||
for (int b=-padding; b < max_move; b++) {
|
||||
int idy_2 = stride*idy +a;
|
||||
int idz_2 = stride*idz +b;
|
||||
if (pooling_not_outside(idy_2, idz_2, 0, output_width)) {
|
||||
sum += input[idx][idy_2][idz_2];
|
||||
}
|
||||
}
|
||||
output[idx][idy][idz] = sum/(float)n;
|
||||
}
|
||||
output[idx][idy][idz] = sum/(float)nb_elements;
|
||||
}
|
||||
|
||||
void make_average_pooling_device(float*** input, float*** output, int size, int output_depth, int output_width, int stride) {
|
||||
void make_average_pooling_device(float*** input, float*** output, int size, int output_depth, int output_width, int stride, int padding) {
|
||||
// Make computation
|
||||
dim3 gridSize(i_div_up(output_depth, BLOCKSIZE_x), i_div_up(output_width, BLOCKSIZE_y), i_div_up(output_width, BLOCKSIZE_z));
|
||||
dim3 blockSize(BLOCKSIZE_x, BLOCKSIZE_y, BLOCKSIZE_z);
|
||||
|
||||
make_average_pooling_kernel<<<gridSize, blockSize>>>(input, output, size, output_depth, output_width, stride);
|
||||
make_average_pooling_kernel<<<gridSize, blockSize>>>(input, output, size, output_depth, output_width, stride, padding);
|
||||
gpuErrchk( cudaPeekAtLastError() );
|
||||
gpuErrchk( cudaDeviceSynchronize() );
|
||||
}
|
||||
#endif
|
||||
|
||||
void make_average_pooling_cpu(float*** input, float*** output, int size, int output_depth, int output_width, int stride) {
|
||||
void make_average_pooling_cpu(float*** input, float*** output, int size, int output_depth, int output_width, int stride, int padding) {
|
||||
// input[output_depth][output_width+size-1][output_width+size-1]
|
||||
// output[output_depth][output_width][output_width]
|
||||
float sum;
|
||||
int n = size*size;
|
||||
int max_move = size - padding;
|
||||
|
||||
for (int i=0; i < output_depth; i++) {
|
||||
for (int j=0; j < output_width; j++) {
|
||||
for (int k=0; k < output_width; k++) {
|
||||
sum = 0;
|
||||
for (int a=0; a < size; a++) {
|
||||
for (int b=0; b < size; b++) {
|
||||
sum += input[i][stride*j +a][stride*k +b];
|
||||
float sum = 0.;
|
||||
int nb_elements = 0;
|
||||
for (int a=-padding; a < max_move; a++) {
|
||||
for (int b=-padding; b < max_move; b++) {
|
||||
int j_2 = stride*j +a;
|
||||
int k_2 = stride*k +b;
|
||||
if (pooling_not_outside(j_2, k_2, 0, output_width)) {
|
||||
sum += input[i][j_2][k_2];
|
||||
nb_elements++;
|
||||
}
|
||||
}
|
||||
output[i][j][k] = sum/(float)n;
|
||||
}
|
||||
output[i][j][k] = sum/(float)nb_elements;
|
||||
}
|
||||
}
|
||||
}
|
||||
@ -71,11 +84,11 @@ void make_average_pooling_cpu(float*** input, float*** output, int size, int out
|
||||
#ifdef __CUDACC__
|
||||
extern "C"
|
||||
#endif
|
||||
void make_average_pooling(float*** input, float*** output, int size, int output_depth, int output_width, int stride) {
|
||||
void make_average_pooling(float*** input, float*** output, int size, int output_depth, int output_width, int stride, int padding) {
|
||||
#ifndef __CUDACC__
|
||||
make_average_pooling_cpu(input, output, size, output_depth, output_width, stride);
|
||||
make_average_pooling_cpu(input, output, size, output_depth, output_width, stride, padding);
|
||||
#else
|
||||
make_average_pooling_device(input, output, size, output_depth, output_width, stride);
|
||||
make_average_pooling_device(input, output, size, output_depth, output_width, stride, padding);
|
||||
#endif
|
||||
}
|
||||
|
||||
@ -87,7 +100,7 @@ void make_average_pooling(float*** input, float*** output, int size, int output_
|
||||
* Max Pooling
|
||||
*/
|
||||
#ifdef __CUDACC__
|
||||
__global__ void make_max_pooling_kernel(float*** input, float*** output, int size, int output_depth, int output_width, int stride) {
|
||||
__global__ void make_max_pooling_kernel(float*** input, float*** output, int size, int output_depth, int output_width, int stride, int padding) {
|
||||
// Équivalents respectifs de i, j et k dans la boucle effectuée par le cpu
|
||||
int idx = threadIdx.x + blockDim.x*blockIdx.x; // < output_depth
|
||||
int idy = threadIdx.y + blockDim.y*blockIdx.y; // < output_width
|
||||
@ -97,40 +110,50 @@ __global__ void make_max_pooling_kernel(float*** input, float*** output, int siz
|
||||
return;
|
||||
}
|
||||
|
||||
int max_move = size - padding;
|
||||
float m = -FLT_MAX;
|
||||
float temp;
|
||||
|
||||
for (int a=0; a < size; a++) {
|
||||
for (int b=0; b < size; b++) {
|
||||
temp = input[idx][stride*idy +a][stride*idz +b];
|
||||
for (int a=-padding; a < max_move; a++) {
|
||||
for (int b=-padding; b < max_move; b++) {
|
||||
int idy_2 = stride*idy +a;
|
||||
int idz_2 = stride*idz +b;
|
||||
if (pooling_not_outside(idy_2, idz_2, 0, output_width)) {
|
||||
temp = input[idx][idy_2][idz_2];
|
||||
m = m > temp ? m : temp; // max(m, temp)
|
||||
}
|
||||
}
|
||||
}
|
||||
output[idx][idy][idz] = m;
|
||||
}
|
||||
|
||||
void make_max_pooling_device(float*** input, float*** output, int size, int output_depth, int output_width, int stride) {
|
||||
void make_max_pooling_device(float*** input, float*** output, int size, int output_depth, int output_width, int stride, int padding) {
|
||||
// Make computation
|
||||
dim3 gridSize(i_div_up(output_depth, BLOCKSIZE_x), i_div_up(output_width, BLOCKSIZE_y), i_div_up(output_width, BLOCKSIZE_z));
|
||||
dim3 blockSize(BLOCKSIZE_x, BLOCKSIZE_y, BLOCKSIZE_z);
|
||||
|
||||
make_max_pooling_kernel<<<gridSize, blockSize>>>(input, output, size, output_depth, output_width, stride);
|
||||
make_max_pooling_kernel<<<gridSize, blockSize>>>(input, output, size, output_depth, output_width, stride, int padding);
|
||||
gpuErrchk( cudaPeekAtLastError() );
|
||||
gpuErrchk( cudaDeviceSynchronize() );
|
||||
}
|
||||
#endif
|
||||
|
||||
void make_max_pooling_cpu(float*** input, float*** output, int size, int output_depth, int output_width, int stride) {
|
||||
void make_max_pooling_cpu(float*** input, float*** output, int size, int output_depth, int output_width, int stride, int padding) {
|
||||
// input[output_depth][output_width+size-1][output_width+size-1]
|
||||
// output[output_depth][output_width][output_width]
|
||||
int max_move = size - padding;
|
||||
float m;
|
||||
for (int i=0; i < output_depth; i++) {
|
||||
for (int j=0; j < output_width; j++) {
|
||||
for (int k=0; k < output_width; k++) {
|
||||
m = -FLT_MAX;
|
||||
for (int a=0; a < size; a++) {
|
||||
for (int b=0; b < size; b++) {
|
||||
m = fmaxf(m, input[i][stride*j +a][stride*k +b]);
|
||||
for (int a=-padding; a < max_move; a++) {
|
||||
for (int b=-padding; b < max_move; b++) {
|
||||
int j_2 = stride*j +a;
|
||||
int k_2 = stride*k +b;
|
||||
if (pooling_not_outside(j_2, k_2, 0, output_width)) {
|
||||
m = fmaxf(m, input[i][j_2][k_2]);
|
||||
}
|
||||
}
|
||||
}
|
||||
output[i][j][k] = m;
|
||||
@ -142,11 +165,11 @@ void make_max_pooling_cpu(float*** input, float*** output, int size, int output_
|
||||
#ifdef __CUDACC__
|
||||
extern "C"
|
||||
#endif
|
||||
void make_max_pooling(float*** input, float*** output, int size, int output_depth, int output_width, int stride) {
|
||||
void make_max_pooling(float*** input, float*** output, int size, int output_depth, int output_width, int stride, int padding) {
|
||||
#ifndef __CUDACC__
|
||||
make_max_pooling_cpu(input, output, size, output_depth, output_width, stride);
|
||||
make_max_pooling_cpu(input, output, size, output_depth, output_width, stride, padding);
|
||||
#else
|
||||
make_max_pooling_device(input, output, size, output_depth, output_width, stride);
|
||||
make_max_pooling_device(input, output, size, output_depth, output_width, stride, padding);
|
||||
#endif
|
||||
}
|
||||
|
||||
|
Loading…
Reference in New Issue
Block a user