diff --git a/src/cnn/cnn.c b/src/cnn/cnn.c index c05eb10..6deba62 100644 --- a/src/cnn/cnn.c +++ b/src/cnn/cnn.c @@ -189,7 +189,7 @@ void forward_propagation(Network* network) { * On copie les valeurs de output dans output_z, puis on applique la fonction d'activation à output_z */ if (k_i->cnn) { // Convolution - make_convolution(k_i->cnn, input, output, output_width); + make_convolution(k_i->cnn, input, output, output_width, 1); copy_3d_array(output, output_z, output_depth, output_width, output_width); apply_function_to_matrix(activation, output, output_depth, output_width); } diff --git a/src/cnn/convolution.c b/src/cnn/convolution.c index 2e89074..4219703 100644 --- a/src/cnn/convolution.c +++ b/src/cnn/convolution.c @@ -5,24 +5,27 @@ #include "include/struct.h" #include "../include/utils.h" - #include "include/config.h" -void make_convolution_cpu(Kernel_cnn* kernel, float*** input, float*** output, int output_dim) { +void make_convolution_cpu(Kernel_cnn* kernel, float*** input, float*** output, int output_dim, int stride) { // c'est le kernel de input // input[kernel->rows][kernel_k_size + output_dim-1][kernel_k_size + output_dim-1] // output[kernel->columns][output_dim][output_dim] + + int k_size = kernel->k_size; + int k_columns = kernel->columns; + int k_rows = kernel->rows; float f; - for (int i=0; i < kernel->columns; i++) { // filtre + for (int i=0; i < k_columns; i++) { // filtre for (int j=0; j < output_dim; j++) { // ligne de sortie for (int k=0; k < output_dim; k++) { // colonne de sortie f = kernel->bias[i][j][k]; - for (int a=0; a < kernel->rows; a++) { // Canal de couleur - for (int b=0; b < kernel->k_size; b++) { // ligne du filtre - for (int c=0; c < kernel->k_size; c++) { // colonne du filtre - f += kernel->weights[a][i][b][c]*input[a][j+b][k+c]; + for (int a=0; a < k_rows; a++) { // Canal de couleur + for (int b=0; b < k_size; b++) { // ligne du filtre + for (int c=0; c < k_size; c++) { // colonne du filtre + f += kernel->weights[a][i][b][c]*input[a][stride*j+b][stride*k+c]; } } } @@ -34,7 +37,7 @@ void make_convolution_cpu(Kernel_cnn* kernel, float*** input, float*** output, i #ifdef __CUDACC__ -__global__ void make_convolution_kernel(Kernel_cnn* kernel, float*** input, float*** output, int output_dim) { +__global__ void make_convolution_kernel(Kernel_cnn* kernel, float*** input, float*** output, int output_dim, int stride) { // Équivalents respectifs de i, j et k dans la boucle effectuée par le cpu int idx = threadIdx.x + blockDim.x*blockIdx.x; // < kernel->columns int idy = threadIdx.y + blockDim.y*blockIdx.y; // < min(output_dim, k_size) @@ -49,7 +52,7 @@ __global__ void make_convolution_kernel(Kernel_cnn* kernel, float*** input, floa for (int a=0; a < kernel->rows; a++) { for (int b=0; b < kernel->k_size; b++) { for (int c=0; c < kernel->k_size; c++) { - f += kernel->weights[a][idx][b][c]*input[a][idy+b][idz+c]; + f += kernel->weights[a][idx][b][c]*input[a][idy*stride+b][idz*stride+c]; } } } @@ -57,21 +60,21 @@ __global__ void make_convolution_kernel(Kernel_cnn* kernel, float*** input, floa output[idx][idy][idz] = f; } -void make_convolution_device(Kernel_cnn* kernel, float*** input, float*** output, int output_dim) { +void make_convolution_device(Kernel_cnn* kernel, float*** input, float*** output, int output_dim, int stride) { // Make computation dim3 gridSize(i_div_up(kernel->columns, BLOCKSIZE_x), i_div_up(output_dim, BLOCKSIZE_y), i_div_up(output_dim, BLOCKSIZE_z)); dim3 blockSize(BLOCKSIZE_x, BLOCKSIZE_y, BLOCKSIZE_z); - make_convolution_kernel<<>>(kernel, input, output, output_dim); + make_convolution_kernel<<>>(kernel, input, output, output_dim, stride); gpuErrchk( cudaPeekAtLastError() ); gpuErrchk( cudaDeviceSynchronize() ); } #endif -void make_convolution(Kernel_cnn* kernel, float*** input, float*** output, int output_dim) { +void make_convolution(Kernel_cnn* kernel, float*** input, float*** output, int output_dim, int stride) { #ifndef __CUDACC__ - make_convolution_cpu(kernel, input, output, output_dim); + make_convolution_cpu(kernel, input, output, output_dim, stride); #else - make_convolution_device(kernel, input, output, output_dim); + make_convolution_device(kernel, input, output, output_dim, stride); #endif } \ No newline at end of file diff --git a/src/cnn/convolution.cu b/src/cnn/convolution.cu index 847ad15..4219703 100644 --- a/src/cnn/convolution.cu +++ b/src/cnn/convolution.cu @@ -8,20 +8,24 @@ #include "include/config.h" -void make_convolution_cpu(Kernel_cnn* kernel, float*** input, float*** output, int output_dim) { +void make_convolution_cpu(Kernel_cnn* kernel, float*** input, float*** output, int output_dim, int stride) { // c'est le kernel de input // input[kernel->rows][kernel_k_size + output_dim-1][kernel_k_size + output_dim-1] // output[kernel->columns][output_dim][output_dim] + + int k_size = kernel->k_size; + int k_columns = kernel->columns; + int k_rows = kernel->rows; float f; - for (int i=0; i < kernel->columns; i++) { // filtre + for (int i=0; i < k_columns; i++) { // filtre for (int j=0; j < output_dim; j++) { // ligne de sortie for (int k=0; k < output_dim; k++) { // colonne de sortie f = kernel->bias[i][j][k]; - for (int a=0; a < kernel->rows; a++) { // Canal de couleur - for (int b=0; b < kernel->k_size; b++) { // ligne du filtre - for (int c=0; c < kernel->k_size; c++) { // colonne du filtre - f += kernel->weights[a][i][b][c]*input[a][j+b][k+c]; + for (int a=0; a < k_rows; a++) { // Canal de couleur + for (int b=0; b < k_size; b++) { // ligne du filtre + for (int c=0; c < k_size; c++) { // colonne du filtre + f += kernel->weights[a][i][b][c]*input[a][stride*j+b][stride*k+c]; } } } @@ -33,7 +37,7 @@ void make_convolution_cpu(Kernel_cnn* kernel, float*** input, float*** output, i #ifdef __CUDACC__ -__global__ void make_convolution_kernel(Kernel_cnn* kernel, float*** input, float*** output, int output_dim) { +__global__ void make_convolution_kernel(Kernel_cnn* kernel, float*** input, float*** output, int output_dim, int stride) { // Équivalents respectifs de i, j et k dans la boucle effectuée par le cpu int idx = threadIdx.x + blockDim.x*blockIdx.x; // < kernel->columns int idy = threadIdx.y + blockDim.y*blockIdx.y; // < min(output_dim, k_size) @@ -48,7 +52,7 @@ __global__ void make_convolution_kernel(Kernel_cnn* kernel, float*** input, floa for (int a=0; a < kernel->rows; a++) { for (int b=0; b < kernel->k_size; b++) { for (int c=0; c < kernel->k_size; c++) { - f += kernel->weights[a][idx][b][c]*input[a][idy+b][idz+c]; + f += kernel->weights[a][idx][b][c]*input[a][idy*stride+b][idz*stride+c]; } } } @@ -56,22 +60,21 @@ __global__ void make_convolution_kernel(Kernel_cnn* kernel, float*** input, floa output[idx][idy][idz] = f; } -void make_convolution_device(Kernel_cnn* kernel, float*** input, float*** output, int output_dim) { +void make_convolution_device(Kernel_cnn* kernel, float*** input, float*** output, int output_dim, int stride) { // Make computation dim3 gridSize(i_div_up(kernel->columns, BLOCKSIZE_x), i_div_up(output_dim, BLOCKSIZE_y), i_div_up(output_dim, BLOCKSIZE_z)); dim3 blockSize(BLOCKSIZE_x, BLOCKSIZE_y, BLOCKSIZE_z); - make_convolution_kernel<<>>(kernel, input, output, output_dim); + make_convolution_kernel<<>>(kernel, input, output, output_dim, stride); gpuErrchk( cudaPeekAtLastError() ); gpuErrchk( cudaDeviceSynchronize() ); } #endif -extern "C" -void make_convolution(Kernel_cnn* kernel, float*** input, float*** output, int output_dim) { +void make_convolution(Kernel_cnn* kernel, float*** input, float*** output, int output_dim, int stride) { #ifndef __CUDACC__ - make_convolution_cpu(kernel, input, output, output_dim); + make_convolution_cpu(kernel, input, output, output_dim, stride); #else - make_convolution_device(kernel, input, output, output_dim); + make_convolution_device(kernel, input, output, output_dim, stride); #endif } \ No newline at end of file diff --git a/src/cnn/include/convolution.h b/src/cnn/include/convolution.h index 2f599b1..09cc0e9 100644 --- a/src/cnn/include/convolution.h +++ b/src/cnn/include/convolution.h @@ -3,21 +3,21 @@ /* * Effectue la convolution naïvement sur le processeur */ -void make_convolution_cpu(Kernel_cnn* kernel, float*** input, float*** output, int output_dim); +void make_convolution_cpu(Kernel_cnn* kernel, float*** input, float*** output, int output_dim, int stride); #ifdef __CUDACC__ /* * Kernel de la convolution sur carte graphique */ -__global__ void make_convolution_kernel(int k_size, int columns, int rows, float* bias, size_t pitch_bias, float**** weights, size_t pitch_weights, float*** input, size_t pitch_input, float*** output, size_t pitch_output, int output_dim); +__global__ void make_convolution_kernel(int k_size, int columns, int rows, float* bias, size_t pitch_bias, float**** weights, size_t pitch_weights, float*** input, size_t pitch_input, float*** output, size_t pitch_output, int output_dim, int stride); /* * Effectue la convolution naïvement sur la carte graphique */ -void make_convolution_device(Kernel_cnn* kernel, float*** input, float*** output, int output_dim); +void make_convolution_device(Kernel_cnn* kernel, float*** input, float*** output, int output_dim, int stride); #endif /* * Détermine si la convolution peut-être faite sur la carte graphique au moment de la compilation */ -void make_convolution(Kernel_cnn* kernel, float*** input, float*** output, int output_dim); \ No newline at end of file +void make_convolution(Kernel_cnn* kernel, float*** input, float*** output, int output_dim, int stride); \ No newline at end of file diff --git a/src/cnn/include/make.h b/src/cnn/include/make.h index a1fe694..866b2d3 100644 --- a/src/cnn/include/make.h +++ b/src/cnn/include/make.h @@ -6,12 +6,12 @@ /* * Effectue une convolution sans stride sur le processeur */ -void make_convolution_cpu(Kernel_cnn* kernel, float*** input, float*** output, int output_dim); +void make_convolution_cpu(Kernel_cnn* kernel, float*** input, float*** output, int output_dim, int stride); /* * Effectue la convolution sur le CPU ou GPU */ -void make_convolution(Kernel_cnn* kernel, float*** input, float*** output, int output_dim); +void make_convolution(Kernel_cnn* kernel, float*** input, float*** output, int output_dim, int stride); #ifdef __CUDACC__ extern "C" diff --git a/src/scripts/convolution_benchmark.cu b/src/scripts/convolution_benchmark.cu index 4650cad..78b8ae3 100644 --- a/src/scripts/convolution_benchmark.cu +++ b/src/scripts/convolution_benchmark.cu @@ -157,7 +157,7 @@ void run_convolution_test(int input_dim, int output_dim, int rows, int columns) double cpu_time_used, gpu_time_used; start = clock(); - make_convolution_device(kernel, input, output_gpu, output_dim); + make_convolution_device(kernel, input, output_gpu, output_dim, 1); end = clock(); gpu_time_used = ((double) (end - start)) / CLOCKS_PER_SEC; @@ -165,7 +165,7 @@ void run_convolution_test(int input_dim, int output_dim, int rows, int columns) start = clock(); - make_convolution_cpu(kernel, input, output_cpu, output_dim); + make_convolution_cpu(kernel, input, output_cpu, output_dim, 1); end = clock(); cpu_time_used = ((double) (end - start)) / CLOCKS_PER_SEC; diff --git a/test/cnn_convolution.cu b/test/cnn_convolution.cu index 9716e62..c44ef7d 100644 --- a/test/cnn_convolution.cu +++ b/test/cnn_convolution.cu @@ -140,7 +140,7 @@ void run_convolution_test(int input_dim, int output_dim, int rows, int columns) double cpu_time_used, gpu_time_used; start_time = omp_get_wtime(); - make_convolution_device(kernel, input, output_gpu, output_dim); + make_convolution_device(kernel, input, output_gpu, output_dim, 1); end_time = omp_get_wtime(); @@ -149,7 +149,7 @@ void run_convolution_test(int input_dim, int output_dim, int rows, int columns) start_time = omp_get_wtime(); - make_convolution_cpu(kernel, input, output_cpu, output_dim); + make_convolution_cpu(kernel, input, output_cpu, output_dim, 1); end_time = omp_get_wtime(); cpu_time_used = end_time - start_time;