From c0808b9240f10de96a019efb2a0c17492aa90d0c Mon Sep 17 00:00:00 2001 From: julienChemillier Date: Sat, 13 May 2023 17:22:47 +0200 Subject: [PATCH] Change notation toward a consensus --- README.md | 4 +- src/cnn/backpropagation.c | 126 +++++++++++++-------------- src/cnn/backpropagation.cu | 126 +++++++++++++-------------- src/cnn/convolution.c | 38 ++++---- src/cnn/convolution.cu | 38 ++++---- src/cnn/creation.c | 84 +++++++++--------- src/cnn/include/backpropagation.h | 4 +- src/cnn/include/convolution.h | 8 +- src/cnn/include/creation.h | 8 +- src/cnn/include/make.h | 10 +-- src/cnn/include/neuron_io.h | 2 +- src/cnn/include/print.h | 2 +- src/cnn/include/struct.h | 10 +-- src/cnn/make.c | 48 +++++----- src/cnn/make.cu | 48 +++++----- src/cnn/neuron_io.c | 30 +++---- src/cnn/print.c | 14 +-- src/cnn/train.c | 10 +-- src/cnn/utils.c | 48 +++++----- src/scripts/benchmark_mul.py | 6 +- src/scripts/convolution_benchmark.cu | 28 +++--- test/cnn_convolution.cu | 50 +++++------ 22 files changed, 371 insertions(+), 371 deletions(-) diff --git a/README.md b/README.md index e12dc71..6eb6a99 100644 --- a/README.md +++ b/README.md @@ -173,8 +173,8 @@ Résultats pour un réseau assez conséquent, avec des images de 256x256 pixels:
```c -Network* create_large_network(float learning_rate, int dropout, int activation, int initialisation, int input_dim, int input_depth) { - Network* network = create_network(16, learning_rate, dropout, activation, initialisation, input_dim, input_depth); +Network* create_large_network(float learning_rate, int dropout, int activation, int initialisation, int input_width, int input_depth) { + Network* network = create_network(16, learning_rate, dropout, activation, initialisation, input_width, input_depth); add_convolution(network, 6, 258, activation); add_convolution(network, 16, 256, activation); add_average_pooling(network, 64); diff --git a/src/cnn/backpropagation.c b/src/cnn/backpropagation.c index 397720a..a9d9277 100644 --- a/src/cnn/backpropagation.c +++ b/src/cnn/backpropagation.c @@ -358,16 +358,16 @@ void backward_dense(Kernel_nn* ker, float* input, float* input_z, float* output, * Backward linearisation */ #ifdef __CUDACC__ -__global__ void backward_linearisation_kernel_1(Kernel_nn* ker, float*** input, float* output, int depth_input, int dim_input, int size_output) { - int idx = threadIdx.x + blockDim.x*blockIdx.x; // < depth_input - int idy = threadIdx.y + blockDim.y*blockIdx.y; // < dim_input - int idz = threadIdx.z + blockDim.z*blockIdx.z; // < dim_input +__global__ void backward_linearisation_kernel_1(Kernel_nn* ker, float*** input, float* output, int input_depth, int input_width, int size_output) { + int idx = threadIdx.x + blockDim.x*blockIdx.x; // < input_depth + int idy = threadIdx.y + blockDim.y*blockIdx.y; // < input_width + int idz = threadIdx.z + blockDim.z*blockIdx.z; // < input_width - if (idx >= depth_input || idy >= dim_input || idz >= dim_input) { + if (idx >= input_depth || idy >= input_width || idz >= input_width) { return; } - int id = idx*dim_input*dim_input + idy*dim_input + idz; + int id = idx*input_width*input_width + idy*input_width + idz; for (int j=0; j < size_output; j++) { ker->d_weights[id][j] += input[idx][idy][idz]*output[j]; @@ -379,15 +379,15 @@ __global__ void backward_linearisation_kernel_1(Kernel_nn* ker, float*** input, } } -__global__ void backward_linearisation_kernel_2(Kernel_nn* ker, float*** input, float*** input_z, float* output, int depth_input, int dim_input, int size_output, funcPtr d_f) { - int idx = threadIdx.x + blockDim.x*blockIdx.x; // < depth_input - int idy = threadIdx.y + blockDim.y*blockIdx.y; // < dim_input - int idz = threadIdx.z + blockDim.z*blockIdx.z; // < dim_input +__global__ void backward_linearisation_kernel_2(Kernel_nn* ker, float*** input, float*** input_z, float* output, int input_depth, int input_width, int size_output, funcPtr d_f) { + int idx = threadIdx.x + blockDim.x*blockIdx.x; // < input_depth + int idy = threadIdx.y + blockDim.y*blockIdx.y; // < input_width + int idz = threadIdx.z + blockDim.z*blockIdx.z; // < input_width - if (idx >= depth_input || idy >= dim_input || idz >= dim_input) { + if (idx >= input_depth || idy >= input_width || idz >= input_width) { return; } - int id = idx*dim_input*dim_input + idy*dim_input + idz; + int id = idx*input_width*input_width + idy*input_width + idz; float tmp=0; for (int j=0; j < size_output; j++) { @@ -396,12 +396,12 @@ __global__ void backward_linearisation_kernel_2(Kernel_nn* ker, float*** input, input[idx][idy][idz] = tmp*( (*d_f)(input_z[idx][idy][idz]) ); } -void backward_linearisation_device(Kernel_nn* ker, float*** input, float*** input_z, float* output, int depth_input, int dim_input, int size_output, int activation) { +void backward_linearisation_device(Kernel_nn* ker, float*** input, float*** input_z, float* output, int input_depth, int input_width, int size_output, int activation) { // Make computation - dim3 gridSize(i_div_up(depth_input, BLOCKSIZE_x), i_div_up(dim_input, BLOCKSIZE_y), i_div_up(dim_input, BLOCKSIZE_y)); + dim3 gridSize(i_div_up(input_depth, BLOCKSIZE_x), i_div_up(input_width, BLOCKSIZE_y), i_div_up(input_width, BLOCKSIZE_y)); dim3 blockSize(BLOCKSIZE_x, BLOCKSIZE_y, BLOCKSIZE_z); - backward_linearisation_kernel_1<<>>(ker, input, output, depth_input, dim_input, size_output); + backward_linearisation_kernel_1<<>>(ker, input, output, input_depth, input_width, size_output); gpuErrchk( cudaPeekAtLastError() ); gpuErrchk( cudaDeviceSynchronize() ); @@ -409,14 +409,14 @@ void backward_linearisation_device(Kernel_nn* ker, float*** input, float*** inpu // Second kernel funcPtr d_function = get_activation_function_cuda(activation); - backward_linearisation_kernel_2<<>>(ker, input, input_z, output, depth_input, dim_input, size_output, d_function); + backward_linearisation_kernel_2<<>>(ker, input, input_z, output, input_depth, input_width, size_output, d_function); gpuErrchk( cudaPeekAtLastError() ); gpuErrchk( cudaDeviceSynchronize() ); } #endif -void backward_linearisation_cpu(Kernel_nn* ker, float*** input, float*** input_z, float* output, int depth_input, int dim_input, int size_output, int activation) { +void backward_linearisation_cpu(Kernel_nn* ker, float*** input, float*** input_z, float* output, int input_depth, int input_width, int size_output, int activation) { funcPtr d_function = get_activation_function(activation); @@ -427,9 +427,9 @@ void backward_linearisation_cpu(Kernel_nn* ker, float*** input, float*** input_z // Weights int cpt = 0; - for (int i=0; i < depth_input; i++) { - for (int k=0; k < dim_input; k++) { - for (int l=0; l < dim_input; l++) { + for (int i=0; i < input_depth; i++) { + for (int k=0; k < input_width; k++) { + for (int l=0; l < input_width; l++) { for (int j=0; j < size_output; j++) { ker->d_weights[cpt][j] += input[i][k][l]*output[j]; } @@ -440,9 +440,9 @@ void backward_linearisation_cpu(Kernel_nn* ker, float*** input, float*** input_z // Input cpt = 0; - for (int i=0; i < depth_input; i++) { - for (int k=0; k < dim_input; k++) { - for (int l=0; l < dim_input; l++) { + for (int i=0; i < input_depth; i++) { + for (int k=0; k < input_width; k++) { + for (int l=0; l < input_width; l++) { float tmp=0; for (int j=0; j < size_output; j++) { tmp += output[j]*ker->weights[cpt][j]; @@ -457,11 +457,11 @@ void backward_linearisation_cpu(Kernel_nn* ker, float*** input, float*** input_z #ifdef __CUDACC__ extern "C" #endif -void backward_linearisation(Kernel_nn* ker, float*** input, float*** input_z, float* output, int depth_input, int dim_input, int size_output, int activation) { +void backward_linearisation(Kernel_nn* ker, float*** input, float*** input_z, float* output, int input_depth, int input_width, int size_output, int activation) { #ifndef __CUDACC__ - backward_linearisation_cpu(ker, input, input_z, output, depth_input, dim_input, size_output, activation); + backward_linearisation_cpu(ker, input, input_z, output, input_depth, input_width, size_output, activation); #else - backward_linearisation_device(ker, input, input_z, output, depth_input, dim_input, size_output, activation); + backward_linearisation_device(ker, input, input_z, output, input_depth, input_width, size_output, activation); #endif } @@ -469,18 +469,18 @@ void backward_linearisation(Kernel_nn* ker, float*** input, float*** input_z, fl * Backward convolution */ #ifdef __CUDACC__ -__global__ void backward_convolution_dbias_kernel(Kernel_cnn* ker, float*** output, int depth_output, int dim_output) { +__global__ void backward_convolution_dbias_kernel(Kernel_cnn* ker, float*** output, int output_depth, int output_width) { int idx = threadIdx.x + blockDim.x*blockIdx.x; int idy = threadIdx.y + blockDim.y*blockIdx.y; int idz = threadIdx.z + blockDim.z*blockIdx.z; - if (idx >= depth_output || idy >= dim_output || idz >= dim_output) { + if (idx >= output_depth || idy >= output_width || idz >= output_width) { return; } ker->d_bias[idx][idy][idz] += output[idx][idy][idz]; } -__global__ void backward_convolution_dweight_kernel(Kernel_cnn* ker, float*** input, float*** output, int depth_input, int depth_output, int dim_output, int k_size) { +__global__ void backward_convolution_dweight_kernel(Kernel_cnn* ker, float*** input, float*** output, int input_depth, int output_depth, int output_width, int k_size) { int idx = threadIdx.x + blockDim.x*blockIdx.x; int idy = threadIdx.y + blockDim.y*blockIdx.y; int idz = threadIdx.z + blockDim.z*blockIdx.z; @@ -488,35 +488,35 @@ __global__ void backward_convolution_dweight_kernel(Kernel_cnn* ker, float*** in int idz1 = idz / k_size; int idz2 = idz % k_size; - if (idx >= depth_input || idy >= depth_output || idz1 >= k_size || idz2 >= k_size) { + if (idx >= input_depth || idy >= output_depth || idz1 >= k_size || idz2 >= k_size) { return; } float tmp = 0; - for (int l=0; l < dim_output; l++) { - for (int m=0; m < dim_output; m++) { + for (int l=0; l < output_width; l++) { + for (int m=0; m < output_width; m++) { tmp += input[idx][l+idz1][m+idz2]*output[idy][l][m]; } } ker->d_weights[idx][idy][idz1][idz2] += tmp; } -__global__ void backward_convolution_propagate_kernel(Kernel_cnn* ker, float*** input, float*** input_z, float*** output, int depth_input, int dim_input, int depth_output, int k_size, funcPtr d_f) { +__global__ void backward_convolution_propagate_kernel(Kernel_cnn* ker, float*** input, float*** input_z, float*** output, int input_depth, int input_width, int output_depth, int k_size, funcPtr d_f) { int idx = threadIdx.x + blockDim.x*blockIdx.x; int idy = threadIdx.y + blockDim.y*blockIdx.y; int idz = threadIdx.z + blockDim.z*blockIdx.z; - if (idx >= depth_input || idy >= dim_input || idz >= dim_input) { + if (idx >= input_depth || idy >= input_width || idz >= input_width) { return; } int min_m, max_m, min_n, max_n; float tmp = 0; - for (int l=0; l < depth_output; l++) { + for (int l=0; l < output_depth; l++) { min_m = max(0, k_size-1-idy); - max_m = min(k_size, dim_input - idy); + max_m = min(k_size, input_width - idy); min_n = max(0, k_size-1-idz); - max_n = min(k_size, dim_input-idz); + max_n = min(k_size, input_width-idz); for (int m=min_m; m < max_m; m++) { for (int n=min_n; n < max_n; n++) { tmp += output[l][idy-k_size+m+1][idz-k_size+n+1]*ker->weights[idx][l][m][n]; @@ -526,35 +526,35 @@ __global__ void backward_convolution_propagate_kernel(Kernel_cnn* ker, float*** input[idx][idy][idz] = tmp*( (*d_f)(input_z[idx][idy][idz]) ); } -void backward_convolution_device(Kernel_cnn* ker, float*** input, float*** input_z, float*** output, int depth_input, int dim_input, int depth_output, int dim_output, int activation, int is_first) { +void backward_convolution_device(Kernel_cnn* ker, float*** input, float*** input_z, float*** output, int input_depth, int input_width, int output_depth, int output_width, int activation, int is_first) { // Bias Kernel - dim3 gridSize1(i_div_up(depth_output, BLOCKSIZE_x), i_div_up(dim_output, BLOCKSIZE_y), i_div_up(dim_output, BLOCKSIZE_y)); + dim3 gridSize1(i_div_up(output_depth, BLOCKSIZE_x), i_div_up(output_width, BLOCKSIZE_y), i_div_up(output_width, BLOCKSIZE_y)); dim3 blockSize1(BLOCKSIZE_x, BLOCKSIZE_y, BLOCKSIZE_z); - backward_convolution_dbias_kernel<<>>(ker, output, depth_output, dim_output); + backward_convolution_dbias_kernel<<>>(ker, output, output_depth, output_width); gpuErrchk( cudaPeekAtLastError() ); gpuErrchk( cudaDeviceSynchronize() ); // Weights Kernel - int k_size = dim_input - dim_output +1; + int k_size = input_width - output_width +1; - dim3 gridSize2(i_div_up(depth_input, BLOCKSIZE_x), i_div_up(depth_output, BLOCKSIZE_y), i_div_up(k_size*k_size, BLOCKSIZE_y)); + dim3 gridSize2(i_div_up(input_depth, BLOCKSIZE_x), i_div_up(output_depth, BLOCKSIZE_y), i_div_up(k_size*k_size, BLOCKSIZE_y)); dim3 blockSize2(BLOCKSIZE_x, BLOCKSIZE_y, BLOCKSIZE_z); - backward_convolution_dweight_kernel<<>>(ker, input, output, depth_input, depth_output, dim_output, k_size); + backward_convolution_dweight_kernel<<>>(ker, input, output, input_depth, output_depth, output_width, k_size); gpuErrchk( cudaPeekAtLastError() ); gpuErrchk( cudaDeviceSynchronize() ); // input propagation Kernel if (is_first != 1) { - dim3 gridSize3(i_div_up(depth_input, BLOCKSIZE_x), i_div_up(dim_input, BLOCKSIZE_y), i_div_up(dim_input, BLOCKSIZE_y)); + dim3 gridSize3(i_div_up(input_depth, BLOCKSIZE_x), i_div_up(input_width, BLOCKSIZE_y), i_div_up(input_width, BLOCKSIZE_y)); dim3 blockSize3(BLOCKSIZE_x, BLOCKSIZE_y, BLOCKSIZE_z); funcPtr d_function = get_activation_function_cuda(activation); - backward_convolution_propagate_kernel<<>>(ker, input, input_z, output, depth_input, dim_input, depth_output, k_size, d_function); + backward_convolution_propagate_kernel<<>>(ker, input, input_z, output, input_depth, input_width, output_depth, k_size, d_function); gpuErrchk( cudaPeekAtLastError() ); gpuErrchk( cudaDeviceSynchronize() ); @@ -563,29 +563,29 @@ void backward_convolution_device(Kernel_cnn* ker, float*** input, float*** input #endif -void backward_convolution_cpu(Kernel_cnn* ker, float*** input, float*** input_z, float*** output, int depth_input, int dim_input, int depth_output, int dim_output, int activation, int is_first) { +void backward_convolution_cpu(Kernel_cnn* ker, float*** input, float*** input_z, float*** output, int input_depth, int input_width, int output_depth, int output_width, int activation, int is_first) { funcPtr d_function = get_activation_function(activation); // Bias - for (int i=0; i < depth_output; i++) { - for (int j=0; j < dim_output; j++) { - for (int k=0; k < dim_output; k++) { + for (int i=0; i < output_depth; i++) { + for (int j=0; j < output_width; j++) { + for (int k=0; k < output_width; k++) { ker->d_bias[i][j][k] += output[i][j][k]; } } } // Weights - int k_size = dim_input - dim_output +1; + int k_size = input_width - output_width +1; - for (int h=0; h < depth_input; h++) { - for (int i=0; i < depth_output; i++) { + for (int h=0; h < input_depth; h++) { + for (int i=0; i < output_depth; i++) { for (int j=0; j < k_size; j++) { for (int k=0; k < k_size; k++) { float tmp = 0; - for (int l=0; l < dim_output; l++) { - for (int m=0; m < dim_output; m++) { + for (int l=0; l < output_width; l++) { + for (int m=0; m < output_width; m++) { tmp += input[h][l+j][m+k]*output[i][l][m]; } } @@ -599,15 +599,15 @@ void backward_convolution_cpu(Kernel_cnn* ker, float*** input, float*** input_z, if (is_first==1) // Pas besoin de backpropager dans l'input return; int min_m, max_m, min_n, max_n; - for (int i=0; i < depth_input; i++) { - for (int j=0; j < dim_input; j++) { - for (int k=0; k < dim_input; k++) { + for (int i=0; i < input_depth; i++) { + for (int j=0; j < input_width; j++) { + for (int k=0; k < input_width; k++) { float tmp = 0; - for (int l=0; l < depth_output; l++) { + for (int l=0; l < output_depth; l++) { min_m = max(0, k_size-1-j); - max_m = min(k_size, dim_input - j); + max_m = min(k_size, input_width - j); min_n = max(0, k_size-1-k); - max_n = min(k_size, dim_input-k); + max_n = min(k_size, input_width-k); for (int m=min_m; m < max_m; m++) { for (int n=min_n; n < max_n; n++) { tmp += output[l][j-k_size+m+1][k-k_size+n+1]*ker->weights[i][l][m][n]; @@ -623,10 +623,10 @@ void backward_convolution_cpu(Kernel_cnn* ker, float*** input, float*** input_z, #ifdef __CUDACC__ extern "C" #endif -void backward_convolution(Kernel_cnn* ker, float*** input, float*** input_z, float*** output, int depth_input, int dim_input, int depth_output, int dim_output, int activation, int is_first) { +void backward_convolution(Kernel_cnn* ker, float*** input, float*** input_z, float*** output, int input_depth, int input_width, int output_depth, int output_width, int activation, int is_first) { #ifndef __CUDACC__ - backward_convolution_cpu(ker, input, input_z, output, depth_input, dim_input, depth_output, dim_output, activation, is_first); + backward_convolution_cpu(ker, input, input_z, output, input_depth, input_width, output_depth, output_width, activation, is_first); #else - backward_convolution_device(ker, input, input_z, output, depth_input, dim_input, depth_output, dim_output, activation, is_first); + backward_convolution_device(ker, input, input_z, output, input_depth, input_width, output_depth, output_width, activation, is_first); #endif } \ No newline at end of file diff --git a/src/cnn/backpropagation.cu b/src/cnn/backpropagation.cu index 397720a..a9d9277 100644 --- a/src/cnn/backpropagation.cu +++ b/src/cnn/backpropagation.cu @@ -358,16 +358,16 @@ void backward_dense(Kernel_nn* ker, float* input, float* input_z, float* output, * Backward linearisation */ #ifdef __CUDACC__ -__global__ void backward_linearisation_kernel_1(Kernel_nn* ker, float*** input, float* output, int depth_input, int dim_input, int size_output) { - int idx = threadIdx.x + blockDim.x*blockIdx.x; // < depth_input - int idy = threadIdx.y + blockDim.y*blockIdx.y; // < dim_input - int idz = threadIdx.z + blockDim.z*blockIdx.z; // < dim_input +__global__ void backward_linearisation_kernel_1(Kernel_nn* ker, float*** input, float* output, int input_depth, int input_width, int size_output) { + int idx = threadIdx.x + blockDim.x*blockIdx.x; // < input_depth + int idy = threadIdx.y + blockDim.y*blockIdx.y; // < input_width + int idz = threadIdx.z + blockDim.z*blockIdx.z; // < input_width - if (idx >= depth_input || idy >= dim_input || idz >= dim_input) { + if (idx >= input_depth || idy >= input_width || idz >= input_width) { return; } - int id = idx*dim_input*dim_input + idy*dim_input + idz; + int id = idx*input_width*input_width + idy*input_width + idz; for (int j=0; j < size_output; j++) { ker->d_weights[id][j] += input[idx][idy][idz]*output[j]; @@ -379,15 +379,15 @@ __global__ void backward_linearisation_kernel_1(Kernel_nn* ker, float*** input, } } -__global__ void backward_linearisation_kernel_2(Kernel_nn* ker, float*** input, float*** input_z, float* output, int depth_input, int dim_input, int size_output, funcPtr d_f) { - int idx = threadIdx.x + blockDim.x*blockIdx.x; // < depth_input - int idy = threadIdx.y + blockDim.y*blockIdx.y; // < dim_input - int idz = threadIdx.z + blockDim.z*blockIdx.z; // < dim_input +__global__ void backward_linearisation_kernel_2(Kernel_nn* ker, float*** input, float*** input_z, float* output, int input_depth, int input_width, int size_output, funcPtr d_f) { + int idx = threadIdx.x + blockDim.x*blockIdx.x; // < input_depth + int idy = threadIdx.y + blockDim.y*blockIdx.y; // < input_width + int idz = threadIdx.z + blockDim.z*blockIdx.z; // < input_width - if (idx >= depth_input || idy >= dim_input || idz >= dim_input) { + if (idx >= input_depth || idy >= input_width || idz >= input_width) { return; } - int id = idx*dim_input*dim_input + idy*dim_input + idz; + int id = idx*input_width*input_width + idy*input_width + idz; float tmp=0; for (int j=0; j < size_output; j++) { @@ -396,12 +396,12 @@ __global__ void backward_linearisation_kernel_2(Kernel_nn* ker, float*** input, input[idx][idy][idz] = tmp*( (*d_f)(input_z[idx][idy][idz]) ); } -void backward_linearisation_device(Kernel_nn* ker, float*** input, float*** input_z, float* output, int depth_input, int dim_input, int size_output, int activation) { +void backward_linearisation_device(Kernel_nn* ker, float*** input, float*** input_z, float* output, int input_depth, int input_width, int size_output, int activation) { // Make computation - dim3 gridSize(i_div_up(depth_input, BLOCKSIZE_x), i_div_up(dim_input, BLOCKSIZE_y), i_div_up(dim_input, BLOCKSIZE_y)); + dim3 gridSize(i_div_up(input_depth, BLOCKSIZE_x), i_div_up(input_width, BLOCKSIZE_y), i_div_up(input_width, BLOCKSIZE_y)); dim3 blockSize(BLOCKSIZE_x, BLOCKSIZE_y, BLOCKSIZE_z); - backward_linearisation_kernel_1<<>>(ker, input, output, depth_input, dim_input, size_output); + backward_linearisation_kernel_1<<>>(ker, input, output, input_depth, input_width, size_output); gpuErrchk( cudaPeekAtLastError() ); gpuErrchk( cudaDeviceSynchronize() ); @@ -409,14 +409,14 @@ void backward_linearisation_device(Kernel_nn* ker, float*** input, float*** inpu // Second kernel funcPtr d_function = get_activation_function_cuda(activation); - backward_linearisation_kernel_2<<>>(ker, input, input_z, output, depth_input, dim_input, size_output, d_function); + backward_linearisation_kernel_2<<>>(ker, input, input_z, output, input_depth, input_width, size_output, d_function); gpuErrchk( cudaPeekAtLastError() ); gpuErrchk( cudaDeviceSynchronize() ); } #endif -void backward_linearisation_cpu(Kernel_nn* ker, float*** input, float*** input_z, float* output, int depth_input, int dim_input, int size_output, int activation) { +void backward_linearisation_cpu(Kernel_nn* ker, float*** input, float*** input_z, float* output, int input_depth, int input_width, int size_output, int activation) { funcPtr d_function = get_activation_function(activation); @@ -427,9 +427,9 @@ void backward_linearisation_cpu(Kernel_nn* ker, float*** input, float*** input_z // Weights int cpt = 0; - for (int i=0; i < depth_input; i++) { - for (int k=0; k < dim_input; k++) { - for (int l=0; l < dim_input; l++) { + for (int i=0; i < input_depth; i++) { + for (int k=0; k < input_width; k++) { + for (int l=0; l < input_width; l++) { for (int j=0; j < size_output; j++) { ker->d_weights[cpt][j] += input[i][k][l]*output[j]; } @@ -440,9 +440,9 @@ void backward_linearisation_cpu(Kernel_nn* ker, float*** input, float*** input_z // Input cpt = 0; - for (int i=0; i < depth_input; i++) { - for (int k=0; k < dim_input; k++) { - for (int l=0; l < dim_input; l++) { + for (int i=0; i < input_depth; i++) { + for (int k=0; k < input_width; k++) { + for (int l=0; l < input_width; l++) { float tmp=0; for (int j=0; j < size_output; j++) { tmp += output[j]*ker->weights[cpt][j]; @@ -457,11 +457,11 @@ void backward_linearisation_cpu(Kernel_nn* ker, float*** input, float*** input_z #ifdef __CUDACC__ extern "C" #endif -void backward_linearisation(Kernel_nn* ker, float*** input, float*** input_z, float* output, int depth_input, int dim_input, int size_output, int activation) { +void backward_linearisation(Kernel_nn* ker, float*** input, float*** input_z, float* output, int input_depth, int input_width, int size_output, int activation) { #ifndef __CUDACC__ - backward_linearisation_cpu(ker, input, input_z, output, depth_input, dim_input, size_output, activation); + backward_linearisation_cpu(ker, input, input_z, output, input_depth, input_width, size_output, activation); #else - backward_linearisation_device(ker, input, input_z, output, depth_input, dim_input, size_output, activation); + backward_linearisation_device(ker, input, input_z, output, input_depth, input_width, size_output, activation); #endif } @@ -469,18 +469,18 @@ void backward_linearisation(Kernel_nn* ker, float*** input, float*** input_z, fl * Backward convolution */ #ifdef __CUDACC__ -__global__ void backward_convolution_dbias_kernel(Kernel_cnn* ker, float*** output, int depth_output, int dim_output) { +__global__ void backward_convolution_dbias_kernel(Kernel_cnn* ker, float*** output, int output_depth, int output_width) { int idx = threadIdx.x + blockDim.x*blockIdx.x; int idy = threadIdx.y + blockDim.y*blockIdx.y; int idz = threadIdx.z + blockDim.z*blockIdx.z; - if (idx >= depth_output || idy >= dim_output || idz >= dim_output) { + if (idx >= output_depth || idy >= output_width || idz >= output_width) { return; } ker->d_bias[idx][idy][idz] += output[idx][idy][idz]; } -__global__ void backward_convolution_dweight_kernel(Kernel_cnn* ker, float*** input, float*** output, int depth_input, int depth_output, int dim_output, int k_size) { +__global__ void backward_convolution_dweight_kernel(Kernel_cnn* ker, float*** input, float*** output, int input_depth, int output_depth, int output_width, int k_size) { int idx = threadIdx.x + blockDim.x*blockIdx.x; int idy = threadIdx.y + blockDim.y*blockIdx.y; int idz = threadIdx.z + blockDim.z*blockIdx.z; @@ -488,35 +488,35 @@ __global__ void backward_convolution_dweight_kernel(Kernel_cnn* ker, float*** in int idz1 = idz / k_size; int idz2 = idz % k_size; - if (idx >= depth_input || idy >= depth_output || idz1 >= k_size || idz2 >= k_size) { + if (idx >= input_depth || idy >= output_depth || idz1 >= k_size || idz2 >= k_size) { return; } float tmp = 0; - for (int l=0; l < dim_output; l++) { - for (int m=0; m < dim_output; m++) { + for (int l=0; l < output_width; l++) { + for (int m=0; m < output_width; m++) { tmp += input[idx][l+idz1][m+idz2]*output[idy][l][m]; } } ker->d_weights[idx][idy][idz1][idz2] += tmp; } -__global__ void backward_convolution_propagate_kernel(Kernel_cnn* ker, float*** input, float*** input_z, float*** output, int depth_input, int dim_input, int depth_output, int k_size, funcPtr d_f) { +__global__ void backward_convolution_propagate_kernel(Kernel_cnn* ker, float*** input, float*** input_z, float*** output, int input_depth, int input_width, int output_depth, int k_size, funcPtr d_f) { int idx = threadIdx.x + blockDim.x*blockIdx.x; int idy = threadIdx.y + blockDim.y*blockIdx.y; int idz = threadIdx.z + blockDim.z*blockIdx.z; - if (idx >= depth_input || idy >= dim_input || idz >= dim_input) { + if (idx >= input_depth || idy >= input_width || idz >= input_width) { return; } int min_m, max_m, min_n, max_n; float tmp = 0; - for (int l=0; l < depth_output; l++) { + for (int l=0; l < output_depth; l++) { min_m = max(0, k_size-1-idy); - max_m = min(k_size, dim_input - idy); + max_m = min(k_size, input_width - idy); min_n = max(0, k_size-1-idz); - max_n = min(k_size, dim_input-idz); + max_n = min(k_size, input_width-idz); for (int m=min_m; m < max_m; m++) { for (int n=min_n; n < max_n; n++) { tmp += output[l][idy-k_size+m+1][idz-k_size+n+1]*ker->weights[idx][l][m][n]; @@ -526,35 +526,35 @@ __global__ void backward_convolution_propagate_kernel(Kernel_cnn* ker, float*** input[idx][idy][idz] = tmp*( (*d_f)(input_z[idx][idy][idz]) ); } -void backward_convolution_device(Kernel_cnn* ker, float*** input, float*** input_z, float*** output, int depth_input, int dim_input, int depth_output, int dim_output, int activation, int is_first) { +void backward_convolution_device(Kernel_cnn* ker, float*** input, float*** input_z, float*** output, int input_depth, int input_width, int output_depth, int output_width, int activation, int is_first) { // Bias Kernel - dim3 gridSize1(i_div_up(depth_output, BLOCKSIZE_x), i_div_up(dim_output, BLOCKSIZE_y), i_div_up(dim_output, BLOCKSIZE_y)); + dim3 gridSize1(i_div_up(output_depth, BLOCKSIZE_x), i_div_up(output_width, BLOCKSIZE_y), i_div_up(output_width, BLOCKSIZE_y)); dim3 blockSize1(BLOCKSIZE_x, BLOCKSIZE_y, BLOCKSIZE_z); - backward_convolution_dbias_kernel<<>>(ker, output, depth_output, dim_output); + backward_convolution_dbias_kernel<<>>(ker, output, output_depth, output_width); gpuErrchk( cudaPeekAtLastError() ); gpuErrchk( cudaDeviceSynchronize() ); // Weights Kernel - int k_size = dim_input - dim_output +1; + int k_size = input_width - output_width +1; - dim3 gridSize2(i_div_up(depth_input, BLOCKSIZE_x), i_div_up(depth_output, BLOCKSIZE_y), i_div_up(k_size*k_size, BLOCKSIZE_y)); + dim3 gridSize2(i_div_up(input_depth, BLOCKSIZE_x), i_div_up(output_depth, BLOCKSIZE_y), i_div_up(k_size*k_size, BLOCKSIZE_y)); dim3 blockSize2(BLOCKSIZE_x, BLOCKSIZE_y, BLOCKSIZE_z); - backward_convolution_dweight_kernel<<>>(ker, input, output, depth_input, depth_output, dim_output, k_size); + backward_convolution_dweight_kernel<<>>(ker, input, output, input_depth, output_depth, output_width, k_size); gpuErrchk( cudaPeekAtLastError() ); gpuErrchk( cudaDeviceSynchronize() ); // input propagation Kernel if (is_first != 1) { - dim3 gridSize3(i_div_up(depth_input, BLOCKSIZE_x), i_div_up(dim_input, BLOCKSIZE_y), i_div_up(dim_input, BLOCKSIZE_y)); + dim3 gridSize3(i_div_up(input_depth, BLOCKSIZE_x), i_div_up(input_width, BLOCKSIZE_y), i_div_up(input_width, BLOCKSIZE_y)); dim3 blockSize3(BLOCKSIZE_x, BLOCKSIZE_y, BLOCKSIZE_z); funcPtr d_function = get_activation_function_cuda(activation); - backward_convolution_propagate_kernel<<>>(ker, input, input_z, output, depth_input, dim_input, depth_output, k_size, d_function); + backward_convolution_propagate_kernel<<>>(ker, input, input_z, output, input_depth, input_width, output_depth, k_size, d_function); gpuErrchk( cudaPeekAtLastError() ); gpuErrchk( cudaDeviceSynchronize() ); @@ -563,29 +563,29 @@ void backward_convolution_device(Kernel_cnn* ker, float*** input, float*** input #endif -void backward_convolution_cpu(Kernel_cnn* ker, float*** input, float*** input_z, float*** output, int depth_input, int dim_input, int depth_output, int dim_output, int activation, int is_first) { +void backward_convolution_cpu(Kernel_cnn* ker, float*** input, float*** input_z, float*** output, int input_depth, int input_width, int output_depth, int output_width, int activation, int is_first) { funcPtr d_function = get_activation_function(activation); // Bias - for (int i=0; i < depth_output; i++) { - for (int j=0; j < dim_output; j++) { - for (int k=0; k < dim_output; k++) { + for (int i=0; i < output_depth; i++) { + for (int j=0; j < output_width; j++) { + for (int k=0; k < output_width; k++) { ker->d_bias[i][j][k] += output[i][j][k]; } } } // Weights - int k_size = dim_input - dim_output +1; + int k_size = input_width - output_width +1; - for (int h=0; h < depth_input; h++) { - for (int i=0; i < depth_output; i++) { + for (int h=0; h < input_depth; h++) { + for (int i=0; i < output_depth; i++) { for (int j=0; j < k_size; j++) { for (int k=0; k < k_size; k++) { float tmp = 0; - for (int l=0; l < dim_output; l++) { - for (int m=0; m < dim_output; m++) { + for (int l=0; l < output_width; l++) { + for (int m=0; m < output_width; m++) { tmp += input[h][l+j][m+k]*output[i][l][m]; } } @@ -599,15 +599,15 @@ void backward_convolution_cpu(Kernel_cnn* ker, float*** input, float*** input_z, if (is_first==1) // Pas besoin de backpropager dans l'input return; int min_m, max_m, min_n, max_n; - for (int i=0; i < depth_input; i++) { - for (int j=0; j < dim_input; j++) { - for (int k=0; k < dim_input; k++) { + for (int i=0; i < input_depth; i++) { + for (int j=0; j < input_width; j++) { + for (int k=0; k < input_width; k++) { float tmp = 0; - for (int l=0; l < depth_output; l++) { + for (int l=0; l < output_depth; l++) { min_m = max(0, k_size-1-j); - max_m = min(k_size, dim_input - j); + max_m = min(k_size, input_width - j); min_n = max(0, k_size-1-k); - max_n = min(k_size, dim_input-k); + max_n = min(k_size, input_width-k); for (int m=min_m; m < max_m; m++) { for (int n=min_n; n < max_n; n++) { tmp += output[l][j-k_size+m+1][k-k_size+n+1]*ker->weights[i][l][m][n]; @@ -623,10 +623,10 @@ void backward_convolution_cpu(Kernel_cnn* ker, float*** input, float*** input_z, #ifdef __CUDACC__ extern "C" #endif -void backward_convolution(Kernel_cnn* ker, float*** input, float*** input_z, float*** output, int depth_input, int dim_input, int depth_output, int dim_output, int activation, int is_first) { +void backward_convolution(Kernel_cnn* ker, float*** input, float*** input_z, float*** output, int input_depth, int input_width, int output_depth, int output_width, int activation, int is_first) { #ifndef __CUDACC__ - backward_convolution_cpu(ker, input, input_z, output, depth_input, dim_input, depth_output, dim_output, activation, is_first); + backward_convolution_cpu(ker, input, input_z, output, input_depth, input_width, output_depth, output_width, activation, is_first); #else - backward_convolution_device(ker, input, input_z, output, depth_input, dim_input, depth_output, dim_output, activation, is_first); + backward_convolution_device(ker, input, input_z, output, input_depth, input_width, output_depth, output_width, activation, is_first); #endif } \ No newline at end of file diff --git a/src/cnn/convolution.c b/src/cnn/convolution.c index bd1c1ab..653f11a 100644 --- a/src/cnn/convolution.c +++ b/src/cnn/convolution.c @@ -12,27 +12,27 @@ int convolution_not_outside(int x, int y, int lower_bound, int upper_bound) { return !(x < lower_bound || y < lower_bound || x >= upper_bound || y>= upper_bound); } -void make_convolution_cpu(Kernel_cnn* kernel, float*** input, float*** output, int output_dim, int stride, int padding) { +void make_convolution_cpu(Kernel_cnn* kernel, float*** input, float*** output, int output_width, int stride, int padding) { // c'est le kernel de input - // input[kernel->rows][kernel_k_size + output_dim-1][kernel_k_size + output_dim-1] - // output[kernel->columns][output_dim][output_dim] + // input[kernel->rows][kernel_k_size + output_width-1][kernel_k_size + output_width-1] + // output[kernel->columns][output_width][output_width] int k_columns = kernel->columns; int k_rows = kernel->rows; int max_move = kernel->k_size - padding; - int input_dim = output_dim*stride - 2*padding + kernel->k_size - stride; + int input_width = output_width*stride - 2*padding + kernel->k_size - stride; float f; for (int i=0; i < k_columns; i++) { // filtre - for (int j=0; j < output_dim; j++) { // ligne de sortie - for (int k=0; k < output_dim; k++) { // colonne de sortie + for (int j=0; j < output_width; j++) { // ligne de sortie + for (int k=0; k < output_width; k++) { // colonne de sortie f = kernel->bias[i][j][k]; for (int a=0; a < k_rows; a++) { // Canal de couleur for (int b=-padding; b < max_move; b++) { // ligne du filtre for (int c=-padding; c < max_move; c++) { // colonne du filtre int x = (stride*j+b); int y = (stride*k+c); - if (convolution_not_outside(x, y, 0, input_dim)) { + if (convolution_not_outside(x, y, 0, input_width)) { f += kernel->weights[a][i][b][c]*input[a][stride*j+b][stride*k+c]; } } @@ -46,15 +46,15 @@ void make_convolution_cpu(Kernel_cnn* kernel, float*** input, float*** output, i #ifdef __CUDACC__ -__global__ void make_convolution_kernel(Kernel_cnn* kernel, float*** input, float*** output, int output_dim, int stride, int padding) { +__global__ void make_convolution_kernel(Kernel_cnn* kernel, float*** input, float*** output, int output_width, int stride, int padding) { // Équivalents respectifs de i, j et k dans la boucle effectuée par le cpu int idx = threadIdx.x + blockDim.x*blockIdx.x; // < kernel->columns - int idy = threadIdx.y + blockDim.y*blockIdx.y; // < min(output_dim, k_size) - int idz = threadIdx.z + blockDim.z*blockIdx.z; // < min(output_dim, k_size) + int idy = threadIdx.y + blockDim.y*blockIdx.y; // < min(output_width, k_size) + int idz = threadIdx.z + blockDim.z*blockIdx.z; // < min(output_width, k_size) int max_move = kernel->k_size - padding; - int input_dim = output_dim*stride - 2*padding + kernel->k_size - stride; + int input_width = output_width*stride - 2*padding + kernel->k_size - stride; - if (idx >= kernel->columns || idy >= output_dim || idz >= output_dim) { + if (idx >= kernel->columns || idy >= output_width || idz >= output_width) { return; } @@ -65,7 +65,7 @@ __global__ void make_convolution_kernel(Kernel_cnn* kernel, float*** input, floa for (int c=-padding; c < max_move; c++) { int idy_2 = idy*stride+b; int idz_2 = idz*stride+c; - if (convolution_not_outside(idy_2, idz_2, 0, input_dim)) { + if (convolution_not_outside(idy_2, idz_2, 0, input_width)) { f += kernel->weights[a][idx][b][c]*input[a][idy_2][idz_2]; } } @@ -75,21 +75,21 @@ __global__ void make_convolution_kernel(Kernel_cnn* kernel, float*** input, floa output[idx][idy][idz] = f; } -void make_convolution_device(Kernel_cnn* kernel, float*** input, float*** output, int output_dim, int stride, int padding) { +void make_convolution_device(Kernel_cnn* kernel, float*** input, float*** output, int output_width, int stride, int padding) { // Make computation - dim3 gridSize(i_div_up(kernel->columns, BLOCKSIZE_x), i_div_up(output_dim, BLOCKSIZE_y), i_div_up(output_dim, BLOCKSIZE_z)); + dim3 gridSize(i_div_up(kernel->columns, BLOCKSIZE_x), i_div_up(output_width, BLOCKSIZE_y), i_div_up(output_width, BLOCKSIZE_z)); dim3 blockSize(BLOCKSIZE_x, BLOCKSIZE_y, BLOCKSIZE_z); - make_convolution_kernel<<>>(kernel, input, output, output_dim, stride, padding); + make_convolution_kernel<<>>(kernel, input, output, output_width, stride, padding); gpuErrchk( cudaPeekAtLastError() ); gpuErrchk( cudaDeviceSynchronize() ); } #endif -void make_convolution(Kernel_cnn* kernel, float*** input, float*** output, int output_dim, int stride, int padding) { +void make_convolution(Kernel_cnn* kernel, float*** input, float*** output, int output_width, int stride, int padding) { #ifndef __CUDACC__ - make_convolution_cpu(kernel, input, output, output_dim, stride, padding); + make_convolution_cpu(kernel, input, output, output_width, stride, padding); #else - make_convolution_device(kernel, input, output, output_dim, stride, padding); + make_convolution_device(kernel, input, output, output_width, stride, padding); #endif } \ No newline at end of file diff --git a/src/cnn/convolution.cu b/src/cnn/convolution.cu index 9459d76..938276f 100644 --- a/src/cnn/convolution.cu +++ b/src/cnn/convolution.cu @@ -16,27 +16,27 @@ int convolution_not_outside(int x, int y, int lower_bound, int upper_bound) { return !(x < lower_bound || y < lower_bound || x >= upper_bound || y>= upper_bound); } -void make_convolution_cpu(Kernel_cnn* kernel, float*** input, float*** output, int output_dim, int stride, int padding) { +void make_convolution_cpu(Kernel_cnn* kernel, float*** input, float*** output, int output_width, int stride, int padding) { // c'est le kernel de input - // input[kernel->rows][kernel_k_size + output_dim-1][kernel_k_size + output_dim-1] - // output[kernel->columns][output_dim][output_dim] + // input[kernel->rows][kernel_k_size + output_width-1][kernel_k_size + output_width-1] + // output[kernel->columns][output_width][output_width] int k_columns = kernel->columns; int k_rows = kernel->rows; int max_move = kernel->k_size - padding; - int input_dim = output_dim*stride - 2*padding + kernel->k_size - stride; + int input_width = output_width*stride - 2*padding + kernel->k_size - stride; float f; for (int i=0; i < k_columns; i++) { // filtre - for (int j=0; j < output_dim; j++) { // ligne de sortie - for (int k=0; k < output_dim; k++) { // colonne de sortie + for (int j=0; j < output_width; j++) { // ligne de sortie + for (int k=0; k < output_width; k++) { // colonne de sortie f = kernel->bias[i][j][k]; for (int a=0; a < k_rows; a++) { // Canal de couleur for (int b=-padding; b < max_move; b++) { // ligne du filtre for (int c=-padding; c < max_move; c++) { // colonne du filtre int x = (stride*j+b); int y = (stride*k+c); - if (convolution_not_outside(x, y, 0, input_dim)) { + if (convolution_not_outside(x, y, 0, input_width)) { f += kernel->weights[a][i][b][c]*input[a][stride*j+b][stride*k+c]; } } @@ -50,15 +50,15 @@ void make_convolution_cpu(Kernel_cnn* kernel, float*** input, float*** output, i #ifdef __CUDACC__ -__global__ void make_convolution_kernel(Kernel_cnn* kernel, float*** input, float*** output, int output_dim, int stride, int padding) { +__global__ void make_convolution_kernel(Kernel_cnn* kernel, float*** input, float*** output, int output_width, int stride, int padding) { // Équivalents respectifs de i, j et k dans la boucle effectuée par le cpu int idx = threadIdx.x + blockDim.x*blockIdx.x; // < kernel->columns - int idy = threadIdx.y + blockDim.y*blockIdx.y; // < min(output_dim, k_size) - int idz = threadIdx.z + blockDim.z*blockIdx.z; // < min(output_dim, k_size) + int idy = threadIdx.y + blockDim.y*blockIdx.y; // < min(output_width, k_size) + int idz = threadIdx.z + blockDim.z*blockIdx.z; // < min(output_width, k_size) int max_move = kernel->k_size - padding; - int input_dim = output_dim*stride - 2*padding + kernel->k_size - stride; + int input_width = output_width*stride - 2*padding + kernel->k_size - stride; - if (idx >= kernel->columns || idy >= output_dim || idz >= output_dim) { + if (idx >= kernel->columns || idy >= output_width || idz >= output_width) { return; } @@ -69,7 +69,7 @@ __global__ void make_convolution_kernel(Kernel_cnn* kernel, float*** input, floa for (int c=-padding; c < max_move; c++) { int idy_2 = idy*stride+b; int idz_2 = idz*stride+c; - if (convolution_not_outside(idy_2, idz_2, 0, input_dim)) { + if (convolution_not_outside(idy_2, idz_2, 0, input_width)) { f += kernel->weights[a][idx][b][c]*input[a][idy_2][idz_2]; } } @@ -79,21 +79,21 @@ __global__ void make_convolution_kernel(Kernel_cnn* kernel, float*** input, floa output[idx][idy][idz] = f; } -void make_convolution_device(Kernel_cnn* kernel, float*** input, float*** output, int output_dim, int stride, int padding) { +void make_convolution_device(Kernel_cnn* kernel, float*** input, float*** output, int output_width, int stride, int padding) { // Make computation - dim3 gridSize(i_div_up(kernel->columns, BLOCKSIZE_x), i_div_up(output_dim, BLOCKSIZE_y), i_div_up(output_dim, BLOCKSIZE_z)); + dim3 gridSize(i_div_up(kernel->columns, BLOCKSIZE_x), i_div_up(output_width, BLOCKSIZE_y), i_div_up(output_width, BLOCKSIZE_z)); dim3 blockSize(BLOCKSIZE_x, BLOCKSIZE_y, BLOCKSIZE_z); - make_convolution_kernel<<>>(kernel, input, output, output_dim, stride, padding); + make_convolution_kernel<<>>(kernel, input, output, output_width, stride, padding); gpuErrchk( cudaPeekAtLastError() ); gpuErrchk( cudaDeviceSynchronize() ); } #endif -void make_convolution(Kernel_cnn* kernel, float*** input, float*** output, int output_dim, int stride, int padding) { +void make_convolution(Kernel_cnn* kernel, float*** input, float*** output, int output_width, int stride, int padding) { #ifndef __CUDACC__ - make_convolution_cpu(kernel, input, output, output_dim, stride, padding); + make_convolution_cpu(kernel, input, output, output_width, stride, padding); #else - make_convolution_device(kernel, input, output, output_dim, stride, padding); + make_convolution_device(kernel, input, output, output_width, stride, padding); #endif } \ No newline at end of file diff --git a/src/cnn/creation.c b/src/cnn/creation.c index 023132a..5c5a1ab 100644 --- a/src/cnn/creation.c +++ b/src/cnn/creation.c @@ -9,7 +9,7 @@ #include "include/creation.h" -Network* create_network(int max_size, float learning_rate, int dropout, int activation, int initialisation, int input_dim, int input_depth) { +Network* create_network(int max_size, float learning_rate, int dropout, int activation, int initialisation, int input_width, int input_depth) { if (dropout < 0 || dropout > 100) { printf_error("La probabilité de dropout n'est pas respecté, elle doit être comprise entre 0 et 100\n"); } @@ -29,17 +29,17 @@ Network* create_network(int max_size, float learning_rate, int dropout, int acti } network->kernel[0]->linearisation = DOESNT_LINEARISE; network->kernel[0]->activation = activation; - network->width[0] = input_dim; + network->width[0] = input_width; network->depth[0] = input_depth; network->kernel[0]->nn = NULL; network->kernel[0]->cnn = NULL; - create_a_cube_input_layer(network, 0, input_depth, input_dim); - create_a_cube_input_z_layer(network, 0, input_depth, input_dim); + create_a_cube_input_layer(network, 0, input_depth, input_width); + create_a_cube_input_z_layer(network, 0, input_depth, input_width); return network; } -Network* create_network_lenet5(float learning_rate, int dropout, int activation, int initialisation, int input_dim, int input_depth) { - Network* network = create_network(8, learning_rate, dropout, activation, initialisation, input_dim, input_depth); +Network* create_network_lenet5(float learning_rate, int dropout, int activation, int initialisation, int input_width, int input_depth) { + Network* network = create_network(8, learning_rate, dropout, activation, initialisation, input_width, input_depth); add_convolution(network, 5, 6, 1, 0, activation); add_average_pooling(network, 2, 2, 0); add_convolution(network, 5, 16, 1, 0, activation); @@ -50,8 +50,8 @@ Network* create_network_lenet5(float learning_rate, int dropout, int activation, return network; } -Network* create_simple_one(float learning_rate, int dropout, int activation, int initialisation, int input_dim, int input_depth) { - Network* network = create_network(3, learning_rate, dropout, activation, initialisation, input_dim, input_depth); +Network* create_simple_one(float learning_rate, int dropout, int activation, int initialisation, int input_width, int input_depth) { + Network* network = create_network(3, learning_rate, dropout, activation, initialisation, input_width, input_depth); add_dense_linearisation(network, 80, activation); add_dense(network, 10, SOFTMAX); return network; @@ -104,8 +104,8 @@ void add_average_pooling(Network* network, int kernel_size, int stride, int padd printf_error("Impossible de rajouter une couche d'average pooling, le réseau est déjà plein\n"); return; } - int dim_input = network->width[k_pos]; - int dim_output = (2*padding + dim_input - (kernel_size - stride))/stride; + int input_width = network->width[k_pos]; + int output_width = (2*padding + input_width - (kernel_size - stride))/stride; network->kernel[k_pos]->cnn = NULL; network->kernel[k_pos]->nn = NULL; @@ -115,8 +115,8 @@ void add_average_pooling(Network* network, int kernel_size, int stride, int padd network->kernel[k_pos]->linearisation = DOESNT_LINEARISE; network->kernel[k_pos]->pooling = AVG_POOLING; - create_a_cube_input_layer(network, n, network->depth[n-1], dim_output); - create_a_cube_input_z_layer(network, n, network->depth[n-1], dim_output); + create_a_cube_input_layer(network, n, network->depth[n-1], output_width); + create_a_cube_input_z_layer(network, n, network->depth[n-1], output_width); network->size++; } @@ -127,8 +127,8 @@ void add_max_pooling(Network* network, int kernel_size, int stride, int padding) printf_error("Impossible de rajouter une couche de max pooling, le réseau est déjà plein\n"); return; } - int dim_input = network->width[k_pos]; - int dim_output = (2*padding + dim_input - (kernel_size - stride))/stride; + int input_width = network->width[k_pos]; + int output_width = (2*padding + input_width - (kernel_size - stride))/stride; network->kernel[k_pos]->cnn = NULL; network->kernel[k_pos]->nn = NULL; @@ -136,8 +136,8 @@ void add_max_pooling(Network* network, int kernel_size, int stride, int padding) network->kernel[k_pos]->linearisation = DOESNT_LINEARISE; network->kernel[k_pos]->pooling = MAX_POOLING; - create_a_cube_input_layer(network, n, network->depth[n-1], dim_output); - create_a_cube_input_z_layer(network, n, network->depth[n-1], dim_output); + create_a_cube_input_layer(network, n, network->depth[n-1], output_width); + create_a_cube_input_z_layer(network, n, network->depth[n-1], output_width); network->size++; } @@ -148,13 +148,13 @@ void add_convolution(Network* network, int kernel_size, int number_of_kernels, i printf_error("Impossible de rajouter une couche de convolution, le réseau est déjà plein \n"); return; } - int depth_input = network->depth[k_pos]; - int dim_input = network->width[k_pos]; + int input_depth = network->depth[k_pos]; + int input_width = network->width[k_pos]; - int dim_output = (2*padding + dim_input - (kernel_size - stride))/stride; - int depth_output = number_of_kernels; + int output_width = (2*padding + input_width - (kernel_size - stride))/stride; + int output_depth = number_of_kernels; - int bias_size = dim_output; + int bias_size = output_width; network->kernel[k_pos]->nn = NULL; network->kernel[k_pos]->stride = stride; @@ -166,23 +166,23 @@ void add_convolution(Network* network, int kernel_size, int number_of_kernels, i network->kernel[k_pos]->cnn = (Kernel_cnn*)nalloc(1, sizeof(Kernel_cnn)); Kernel_cnn* cnn = network->kernel[k_pos]->cnn; cnn->k_size = kernel_size; - cnn->rows = depth_input; - cnn->columns = depth_output; + cnn->rows = input_depth; + cnn->columns = output_depth; - cnn->weights = (float****)nalloc(depth_input, sizeof(float***)); - cnn->d_weights = (float****)nalloc(depth_input, sizeof(float***)); + cnn->weights = (float****)nalloc(input_depth, sizeof(float***)); + cnn->d_weights = (float****)nalloc(input_depth, sizeof(float***)); #ifdef ADAM_CNN_WEIGHTS - cnn->s_d_weights = (float****)nalloc(depth_input, sizeof(float***)); - cnn->v_d_weights = (float****)nalloc(depth_input, sizeof(float***)); + cnn->s_d_weights = (float****)nalloc(input_depth, sizeof(float***)); + cnn->v_d_weights = (float****)nalloc(input_depth, sizeof(float***)); #endif - for (int i=0; i < depth_input; i++) { - cnn->weights[i] = (float***)nalloc(depth_output, sizeof(float**)); - cnn->d_weights[i] = (float***)nalloc(depth_output, sizeof(float**)); + for (int i=0; i < input_depth; i++) { + cnn->weights[i] = (float***)nalloc(output_depth, sizeof(float**)); + cnn->d_weights[i] = (float***)nalloc(output_depth, sizeof(float**)); #ifdef ADAM_CNN_WEIGHTS - cnn->s_d_weights[i] = (float***)nalloc(depth_output, sizeof(float**)); - cnn->v_d_weights[i] = (float***)nalloc(depth_output, sizeof(float**)); + cnn->s_d_weights[i] = (float***)nalloc(output_depth, sizeof(float**)); + cnn->v_d_weights[i] = (float***)nalloc(output_depth, sizeof(float**)); #endif - for (int j=0; j < depth_output; j++) { + for (int j=0; j < output_depth; j++) { cnn->weights[i][j] = (float**)nalloc(kernel_size, sizeof(float*)); cnn->d_weights[i][j] = (float**)nalloc(kernel_size, sizeof(float*)); #ifdef ADAM_CNN_WEIGHTS @@ -207,13 +207,13 @@ void add_convolution(Network* network, int kernel_size, int number_of_kernels, i } } - cnn->bias = (float***)nalloc(depth_output, sizeof(float**)); - cnn->d_bias = (float***)nalloc(depth_output, sizeof(float**)); + cnn->bias = (float***)nalloc(output_depth, sizeof(float**)); + cnn->d_bias = (float***)nalloc(output_depth, sizeof(float**)); #ifdef ADAM_CNN_BIAS - cnn->s_d_bias = (float***)nalloc(depth_output, sizeof(float**)); - cnn->v_d_bias = (float***)nalloc(depth_output, sizeof(float**)); + cnn->s_d_bias = (float***)nalloc(output_depth, sizeof(float**)); + cnn->v_d_bias = (float***)nalloc(output_depth, sizeof(float**)); #endif - for (int i=0; i < depth_output; i++) { + for (int i=0; i < output_depth; i++) { cnn->bias[i] = (float**)nalloc(bias_size, sizeof(float*)); cnn->d_bias[i] = (float**)nalloc(bias_size, sizeof(float*)); #ifdef ADAM_CNN_BIAS @@ -239,10 +239,10 @@ void add_convolution(Network* network, int kernel_size, int number_of_kernels, i int n_in = network->width[n-1]*network->width[n-1]*network->depth[n-1]; int n_out = network->width[n]*network->width[n]*network->depth[n]; - initialisation_3d_matrix(network->initialisation, cnn->bias, depth_output, dim_output, dim_output, n_in, n_out); - initialisation_4d_matrix(network->initialisation, cnn->weights, depth_input, depth_output, kernel_size, kernel_size, n_in, n_out); - create_a_cube_input_layer(network, n, depth_output, bias_size); - create_a_cube_input_z_layer(network, n, depth_output, bias_size); + initialisation_3d_matrix(network->initialisation, cnn->bias, output_depth, output_width, output_width, n_in, n_out); + initialisation_4d_matrix(network->initialisation, cnn->weights, input_depth, output_depth, kernel_size, kernel_size, n_in, n_out); + create_a_cube_input_layer(network, n, output_depth, bias_size); + create_a_cube_input_z_layer(network, n, output_depth, bias_size); network->size++; } diff --git a/src/cnn/include/backpropagation.h b/src/cnn/include/backpropagation.h index 0786e5f..83095a5 100644 --- a/src/cnn/include/backpropagation.h +++ b/src/cnn/include/backpropagation.h @@ -59,7 +59,7 @@ extern "C" /* * Transfert les informations d'erreur à travers une couche de linéarisation */ -void backward_linearisation(Kernel_nn* ker, float*** input, float*** input_z, float* output, int depth_input, int dim_input, int size_output, int activation); +void backward_linearisation(Kernel_nn* ker, float*** input, float*** input_z, float* output, int input_depth, int input_width, int size_output, int activation); #ifdef __CUDACC__ @@ -68,6 +68,6 @@ extern "C" /* * Transfert les informations d'erreur à travers un couche de convolution */ -void backward_convolution(Kernel_cnn* ker, float*** input, float*** input_z, float*** output, int depth_input, int dim_input, int depth_output, int dim_output, int activation, int is_first); +void backward_convolution(Kernel_cnn* ker, float*** input, float*** input_z, float*** output, int input_depth, int input_width, int output_depth, int output_width, int activation, int is_first); #endif diff --git a/src/cnn/include/convolution.h b/src/cnn/include/convolution.h index e9d0b0e..dde60fa 100644 --- a/src/cnn/include/convolution.h +++ b/src/cnn/include/convolution.h @@ -10,21 +10,21 @@ int convolution_not_outside(int x, int y, int lower_bound, int upper_bound); /* * Effectue la convolution naïvement sur le processeur */ -void make_convolution_cpu(Kernel_cnn* kernel, float*** input, float*** output, int output_dim, int stride, int padding); +void make_convolution_cpu(Kernel_cnn* kernel, float*** input, float*** output, int output_width, int stride, int padding); #ifdef __CUDACC__ /* * Kernel de la convolution sur carte graphique */ -__global__ void make_convolution_kernel(int k_size, int columns, int rows, float* bias, size_t pitch_bias, float**** weights, size_t pitch_weights, float*** input, size_t pitch_input, float*** output, size_t pitch_output, int output_dim, int stride, int padding); +__global__ void make_convolution_kernel(int k_size, int columns, int rows, float* bias, size_t pitch_bias, float**** weights, size_t pitch_weights, float*** input, size_t pitch_input, float*** output, size_t pitch_output, int output_width, int stride, int padding); /* * Effectue la convolution naïvement sur la carte graphique */ -void make_convolution_device(Kernel_cnn* kernel, float*** input, float*** output, int output_dim, int stride, int padding); +void make_convolution_device(Kernel_cnn* kernel, float*** input, float*** output, int output_width, int stride, int padding); #endif /* * Détermine si la convolution peut-être faite sur la carte graphique au moment de la compilation */ -void make_convolution(Kernel_cnn* kernel, float*** input, float*** output, int output_dim, int stride, int padding); \ No newline at end of file +void make_convolution(Kernel_cnn* kernel, float*** input, float*** output, int output_width, int stride, int padding); \ No newline at end of file diff --git a/src/cnn/include/creation.h b/src/cnn/include/creation.h index 759a015..7ec1897 100644 --- a/src/cnn/include/creation.h +++ b/src/cnn/include/creation.h @@ -7,17 +7,17 @@ /* * Créé un réseau qui peut contenir max_size couche (dont celle d'input et d'output) */ -Network* create_network(int max_size, float learning_rate, int dropout, int activation, int initialisation, int input_dim, int input_depth); +Network* create_network(int max_size, float learning_rate, int dropout, int activation, int initialisation, int input_width, int input_depth); /* * Renvoie un réseau suivant l'architecture LeNet5 */ -Network* create_network_lenet5(float learning_rate, int dropout, int activation, int initialisation, int input_dim, int input_depth); +Network* create_network_lenet5(float learning_rate, int dropout, int activation, int initialisation, int input_width, int input_depth); /* * Renvoie un réseau sans convolution, similaire à celui utilisé dans src/dense */ -Network* create_simple_one(float learning_rate, int dropout, int activation, int initialisation, int input_dim, int input_depth); +Network* create_simple_one(float learning_rate, int dropout, int activation, int initialisation, int input_width, int input_depth); /* * Créé et alloue de la mémoire à une couche de type input cube @@ -49,7 +49,7 @@ void add_max_pooling(Network* network, int kernel_size, int stride, int padding) /* * Ajoute au réseau une couche de convolution avec la taille de noyau (kernel_size), * le remplissage (padding) et le décalge (stride) choisis. Le choix de la profondeur de -* la couche suivante se fait avec number_of_kernels (= depth_output) +* la couche suivante se fait avec number_of_kernels (= output_depth) * Puis initialise les poids et les biais construits */ void add_convolution(Network* network, int kernel_size, int number_of_kernels, int stride, int padding, int activation); diff --git a/src/cnn/include/make.h b/src/cnn/include/make.h index 034fc43..2919029 100644 --- a/src/cnn/include/make.h +++ b/src/cnn/include/make.h @@ -14,12 +14,12 @@ int pooling_not_outside(int x, int y, int lower_bound, int upper_bound); /* * Effectue la propagation d'une convolution avec stride et padding choisis sur le processeur */ -void make_convolution_cpu(Kernel_cnn* kernel, float*** input, float*** output, int output_dim, int stride, int padding); +void make_convolution_cpu(Kernel_cnn* kernel, float*** input, float*** output, int output_width, int stride, int padding); /* * Effectue la propagation d'une convolution avec stride et padding choisis sur le CPU ou GPU */ -void make_convolution(Kernel_cnn* kernel, float*** input, float*** output, int output_dim, int stride, int padding); +void make_convolution(Kernel_cnn* kernel, float*** input, float*** output, int output_width, int stride, int padding); #ifdef __CUDACC__ extern "C" @@ -27,7 +27,7 @@ extern "C" /* * Effectue propagation d'average pooling avec stride et padding choisis */ -void make_average_pooling(float*** input, float*** output, int size, int output_depth, int output_dim, int stride, int padding); +void make_average_pooling(float*** input, float*** output, int size, int output_depth, int output_width, int stride, int padding); #ifdef __CUDACC__ extern "C" @@ -35,7 +35,7 @@ extern "C" /* * Effectue propagation de max pooling avec stride et padding choisis */ -void make_max_pooling(float*** input, float*** output, int size, int output_depth, int output_dim, int stride, int padding); +void make_max_pooling(float*** input, float*** output, int size, int output_depth, int output_width, int stride, int padding); #ifdef __CUDACC__ extern "C" @@ -51,6 +51,6 @@ extern "C" /* * Effectue la propagation d'une couche dense qui passe d'une matrice à un vecteur */ -void make_dense_linearized(Kernel_nn* kernel, float*** input, float* output, int depth_input, int dim_input, int size_output); +void make_dense_linearized(Kernel_nn* kernel, float*** input, float* output, int input_depth, int input_width, int size_output); #endif \ No newline at end of file diff --git a/src/cnn/include/neuron_io.h b/src/cnn/include/neuron_io.h index 64206f5..015055a 100644 --- a/src/cnn/include/neuron_io.h +++ b/src/cnn/include/neuron_io.h @@ -33,5 +33,5 @@ Network* read_network(char* filename); /* * Lit une kernel dans le fichier spécifié par le pointeur ptr */ -Kernel* read_kernel(int type_couche, int output_dim, FILE* ptr); +Kernel* read_kernel(int type_couche, int output_width, FILE* ptr); #endif \ No newline at end of file diff --git a/src/cnn/include/print.h b/src/cnn/include/print.h index 6a65aa2..f053497 100644 --- a/src/cnn/include/print.h +++ b/src/cnn/include/print.h @@ -6,7 +6,7 @@ /* * Affiche le kernel d'une couche de convolution */ -void print_kernel_cnn(Kernel_cnn* k, int depth_input, int dim_input, int depth_output, int dim_output); +void print_kernel_cnn(Kernel_cnn* k, int input_depth, int input_width, int output_depth, int output_width); /* * Affiche une couche de pooling diff --git a/src/cnn/include/struct.h b/src/cnn/include/struct.h index 6b8ab63..9aa098a 100644 --- a/src/cnn/include/struct.h +++ b/src/cnn/include/struct.h @@ -12,15 +12,15 @@ typedef struct Kernel_cnn { // Noyau ayant une couche matricielle en sortie - int k_size; // k_size = dim_input - dim_output + 1 + int k_size; // k_size = input_width - output_width + 1 int rows; // Depth de l'input int columns; // Depth de l'output - float*** bias; // bias[columns][dim_output][dim_output] <=> bias[depth output][dim output][dim output] - float*** d_bias; // d_bias[columns][dim_output][dim_output] + float*** bias; // bias[columns][output_width][output_width] <=> bias[depth output][dim output][dim output] + float*** d_bias; // d_bias[columns][output_width][output_width] #ifdef ADAM_CNN_BIAS - float*** s_d_bias; // s_d_bias[columns][dim_output][dim_output] - float*** v_d_bias; // v_d_bias[columns][dim_output][dim_output] + float*** s_d_bias; // s_d_bias[columns][output_width][output_width] + float*** v_d_bias; // v_d_bias[columns][output_width][output_width] #endif float**** weights; // weights[rows][columns][k_size][k_size] <=> weights[depth input][depth output][size kernel][size kernel] diff --git a/src/cnn/make.c b/src/cnn/make.c index 423eb99..57c973a 100644 --- a/src/cnn/make.c +++ b/src/cnn/make.c @@ -24,7 +24,7 @@ __global__ void make_average_pooling_kernel(float*** input, float*** output, int int idy = threadIdx.y + blockDim.y*blockIdx.y; // < output_width int idz = threadIdx.z + blockDim.z*blockIdx.z; // < output_width int max_move = size - padding; - int input_dim = output_width*stride - 2*padding + size - stride; + int input_width = output_width*stride - 2*padding + size - stride; if (idx >= output_depth || idy >= output_width || idz >= output_width) { return; @@ -37,7 +37,7 @@ __global__ void make_average_pooling_kernel(float*** input, float*** output, int for (int b=-padding; b < max_move; b++) { int idy_2 = stride*idy +a; int idz_2 = stride*idz +b; - if (pooling_not_outside(idy_2, idz_2, 0, input_dim)) { + if (pooling_not_outside(idy_2, idz_2, 0, input_width)) { sum += input[idx][idy_2][idz_2]; nb_elements++; } @@ -61,7 +61,7 @@ void make_average_pooling_cpu(float*** input, float*** output, int size, int out // input[output_depth][output_width+size-1][output_width+size-1] // output[output_depth][output_width][output_width] int max_move = size - padding; - int input_dim = output_width*stride - 2*padding + size - stride; + int input_width = output_width*stride - 2*padding + size - stride; for (int i=0; i < output_depth; i++) { for (int j=0; j < output_width; j++) { @@ -72,7 +72,7 @@ void make_average_pooling_cpu(float*** input, float*** output, int size, int out for (int b=-padding; b < max_move; b++) { int j_2 = stride*j +a; int k_2 = stride*k +b; - if (pooling_not_outside(j_2, k_2, 0, input_dim)) { + if (pooling_not_outside(j_2, k_2, 0, input_width)) { sum += input[i][j_2][k_2]; nb_elements++; } @@ -108,7 +108,7 @@ __global__ void make_max_pooling_kernel(float*** input, float*** output, int siz int idx = threadIdx.x + blockDim.x*blockIdx.x; // < output_depth int idy = threadIdx.y + blockDim.y*blockIdx.y; // < output_width int idz = threadIdx.z + blockDim.z*blockIdx.z; // < output_width - int input_dim = output_width*stride - 2*padding + size - stride; + int input_width = output_width*stride - 2*padding + size - stride; if (idx >= output_depth || idy >= output_width || idz >= output_width) { return; @@ -122,7 +122,7 @@ __global__ void make_max_pooling_kernel(float*** input, float*** output, int siz for (int b=-padding; b < max_move; b++) { int idy_2 = stride*idy +a; int idz_2 = stride*idz +b; - if (pooling_not_outside(idy_2, idz_2, 0, input_dim)) { + if (pooling_not_outside(idy_2, idz_2, 0, input_width)) { temp = input[idx][idy_2][idz_2]; m = m > temp ? m : temp; // max(m, temp) } @@ -146,7 +146,7 @@ void make_max_pooling_cpu(float*** input, float*** output, int size, int output_ // input[output_depth][output_width+size-1][output_width+size-1] // output[output_depth][output_width][output_width] int max_move = size - padding; - int input_dim = output_width*stride - 2*padding + size - stride; + int input_width = output_width*stride - 2*padding + size - stride; float m; for (int i=0; i < output_depth; i++) { for (int j=0; j < output_width; j++) { @@ -156,7 +156,7 @@ void make_max_pooling_cpu(float*** input, float*** output, int size, int output_ for (int b=-padding; b < max_move; b++) { int j_2 = stride*j +a; int k_2 = stride*k +b; - if (pooling_not_outside(j_2, k_2, 0, input_dim)) { + if (pooling_not_outside(j_2, k_2, 0, input_width)) { m = fmaxf(m, input[i][j_2][k_2]); } } @@ -248,7 +248,7 @@ void make_dense(Kernel_nn* kernel, float* input, float* output, int size_input, * Dense linearized */ #ifdef __CUDACC__ -__global__ void make_dense_linearized_kernel(float** weights, float* bias, float*** input, float* output, int depth_input, int dim_input, int size_output) { +__global__ void make_dense_linearized_kernel(float** weights, float* bias, float*** input, float* output, int input_depth, int input_width, int size_output) { // Équivalents respectifs de i, j et k dans la boucle effectuée par le cpu int idx = threadIdx.x + blockDim.x*blockIdx.x; // < size_output @@ -257,38 +257,38 @@ __global__ void make_dense_linearized_kernel(float** weights, float* bias, float } float f = bias[idx]; - for (int i=0; i < depth_input; i++) { - for (int j=0; j < dim_input; j++) { - for (int k=0; k < dim_input; k++) { - f += input[i][j][k]*weights[k + j*dim_input + i*depth_input][idx]; + for (int i=0; i < input_depth; i++) { + for (int j=0; j < input_width; j++) { + for (int k=0; k < input_width; k++) { + f += input[i][j][k]*weights[k + j*input_width + i*input_depth][idx]; } } } output[idx] = f; } -void make_dense_linearized_device(Kernel_nn* kernel, float*** input, float* output, int depth_input, int dim_input, int size_output) { +void make_dense_linearized_device(Kernel_nn* kernel, float*** input, float* output, int input_depth, int input_width, int size_output) { // Make computation dim3 gridSize(i_div_up(size_output, BLOCKSIZE_x*BLOCKSIZE_y), 1, 1); dim3 blockSize(BLOCKSIZE_x*BLOCKSIZE_y, 1, BLOCKSIZE_z); - make_dense_linearized_kernel<<>>(kernel->weights, kernel->bias, input, output, depth_input, dim_input, size_output); + make_dense_linearized_kernel<<>>(kernel->weights, kernel->bias, input, output, input_depth, input_width, size_output); gpuErrchk( cudaPeekAtLastError() ); gpuErrchk( cudaDeviceSynchronize() ); } #endif -void make_dense_linearized_cpu(Kernel_nn* kernel, float*** input, float* output, int depth_input, int dim_input, int size_output) { - // input[depth_input][dim_input][dim_input] +void make_dense_linearized_cpu(Kernel_nn* kernel, float*** input, float* output, int input_depth, int input_width, int size_output) { + // input[input_depth][input_width][input_width] // output[size_output] float f; for (int l=0; l < size_output; l++) { f = kernel->bias[l]; - for (int i=0; i < depth_input; i++) { - for (int j=0; j < dim_input; j++) { - for (int k=0; k < dim_input; k++) { - f += input[i][j][k]*kernel->weights[k + j*dim_input + i*depth_input][l]; + for (int i=0; i < input_depth; i++) { + for (int j=0; j < input_width; j++) { + for (int k=0; k < input_width; k++) { + f += input[i][j][k]*kernel->weights[k + j*input_width + i*input_depth][l]; } } } @@ -299,10 +299,10 @@ void make_dense_linearized_cpu(Kernel_nn* kernel, float*** input, float* output, #ifdef __CUDACC__ extern "C" #endif -void make_dense_linearized(Kernel_nn* kernel, float*** input, float* output, int depth_input, int dim_input, int size_output) { +void make_dense_linearized(Kernel_nn* kernel, float*** input, float* output, int input_depth, int input_width, int size_output) { #ifndef __CUDACC__ - make_dense_linearized_cpu(kernel, input, output, depth_input, dim_input, size_output); + make_dense_linearized_cpu(kernel, input, output, input_depth, input_width, size_output); #else - make_dense_linearized_device(kernel, input, output, depth_input, dim_input, size_output); + make_dense_linearized_device(kernel, input, output, input_depth, input_width, size_output); #endif } diff --git a/src/cnn/make.cu b/src/cnn/make.cu index 423eb99..57c973a 100644 --- a/src/cnn/make.cu +++ b/src/cnn/make.cu @@ -24,7 +24,7 @@ __global__ void make_average_pooling_kernel(float*** input, float*** output, int int idy = threadIdx.y + blockDim.y*blockIdx.y; // < output_width int idz = threadIdx.z + blockDim.z*blockIdx.z; // < output_width int max_move = size - padding; - int input_dim = output_width*stride - 2*padding + size - stride; + int input_width = output_width*stride - 2*padding + size - stride; if (idx >= output_depth || idy >= output_width || idz >= output_width) { return; @@ -37,7 +37,7 @@ __global__ void make_average_pooling_kernel(float*** input, float*** output, int for (int b=-padding; b < max_move; b++) { int idy_2 = stride*idy +a; int idz_2 = stride*idz +b; - if (pooling_not_outside(idy_2, idz_2, 0, input_dim)) { + if (pooling_not_outside(idy_2, idz_2, 0, input_width)) { sum += input[idx][idy_2][idz_2]; nb_elements++; } @@ -61,7 +61,7 @@ void make_average_pooling_cpu(float*** input, float*** output, int size, int out // input[output_depth][output_width+size-1][output_width+size-1] // output[output_depth][output_width][output_width] int max_move = size - padding; - int input_dim = output_width*stride - 2*padding + size - stride; + int input_width = output_width*stride - 2*padding + size - stride; for (int i=0; i < output_depth; i++) { for (int j=0; j < output_width; j++) { @@ -72,7 +72,7 @@ void make_average_pooling_cpu(float*** input, float*** output, int size, int out for (int b=-padding; b < max_move; b++) { int j_2 = stride*j +a; int k_2 = stride*k +b; - if (pooling_not_outside(j_2, k_2, 0, input_dim)) { + if (pooling_not_outside(j_2, k_2, 0, input_width)) { sum += input[i][j_2][k_2]; nb_elements++; } @@ -108,7 +108,7 @@ __global__ void make_max_pooling_kernel(float*** input, float*** output, int siz int idx = threadIdx.x + blockDim.x*blockIdx.x; // < output_depth int idy = threadIdx.y + blockDim.y*blockIdx.y; // < output_width int idz = threadIdx.z + blockDim.z*blockIdx.z; // < output_width - int input_dim = output_width*stride - 2*padding + size - stride; + int input_width = output_width*stride - 2*padding + size - stride; if (idx >= output_depth || idy >= output_width || idz >= output_width) { return; @@ -122,7 +122,7 @@ __global__ void make_max_pooling_kernel(float*** input, float*** output, int siz for (int b=-padding; b < max_move; b++) { int idy_2 = stride*idy +a; int idz_2 = stride*idz +b; - if (pooling_not_outside(idy_2, idz_2, 0, input_dim)) { + if (pooling_not_outside(idy_2, idz_2, 0, input_width)) { temp = input[idx][idy_2][idz_2]; m = m > temp ? m : temp; // max(m, temp) } @@ -146,7 +146,7 @@ void make_max_pooling_cpu(float*** input, float*** output, int size, int output_ // input[output_depth][output_width+size-1][output_width+size-1] // output[output_depth][output_width][output_width] int max_move = size - padding; - int input_dim = output_width*stride - 2*padding + size - stride; + int input_width = output_width*stride - 2*padding + size - stride; float m; for (int i=0; i < output_depth; i++) { for (int j=0; j < output_width; j++) { @@ -156,7 +156,7 @@ void make_max_pooling_cpu(float*** input, float*** output, int size, int output_ for (int b=-padding; b < max_move; b++) { int j_2 = stride*j +a; int k_2 = stride*k +b; - if (pooling_not_outside(j_2, k_2, 0, input_dim)) { + if (pooling_not_outside(j_2, k_2, 0, input_width)) { m = fmaxf(m, input[i][j_2][k_2]); } } @@ -248,7 +248,7 @@ void make_dense(Kernel_nn* kernel, float* input, float* output, int size_input, * Dense linearized */ #ifdef __CUDACC__ -__global__ void make_dense_linearized_kernel(float** weights, float* bias, float*** input, float* output, int depth_input, int dim_input, int size_output) { +__global__ void make_dense_linearized_kernel(float** weights, float* bias, float*** input, float* output, int input_depth, int input_width, int size_output) { // Équivalents respectifs de i, j et k dans la boucle effectuée par le cpu int idx = threadIdx.x + blockDim.x*blockIdx.x; // < size_output @@ -257,38 +257,38 @@ __global__ void make_dense_linearized_kernel(float** weights, float* bias, float } float f = bias[idx]; - for (int i=0; i < depth_input; i++) { - for (int j=0; j < dim_input; j++) { - for (int k=0; k < dim_input; k++) { - f += input[i][j][k]*weights[k + j*dim_input + i*depth_input][idx]; + for (int i=0; i < input_depth; i++) { + for (int j=0; j < input_width; j++) { + for (int k=0; k < input_width; k++) { + f += input[i][j][k]*weights[k + j*input_width + i*input_depth][idx]; } } } output[idx] = f; } -void make_dense_linearized_device(Kernel_nn* kernel, float*** input, float* output, int depth_input, int dim_input, int size_output) { +void make_dense_linearized_device(Kernel_nn* kernel, float*** input, float* output, int input_depth, int input_width, int size_output) { // Make computation dim3 gridSize(i_div_up(size_output, BLOCKSIZE_x*BLOCKSIZE_y), 1, 1); dim3 blockSize(BLOCKSIZE_x*BLOCKSIZE_y, 1, BLOCKSIZE_z); - make_dense_linearized_kernel<<>>(kernel->weights, kernel->bias, input, output, depth_input, dim_input, size_output); + make_dense_linearized_kernel<<>>(kernel->weights, kernel->bias, input, output, input_depth, input_width, size_output); gpuErrchk( cudaPeekAtLastError() ); gpuErrchk( cudaDeviceSynchronize() ); } #endif -void make_dense_linearized_cpu(Kernel_nn* kernel, float*** input, float* output, int depth_input, int dim_input, int size_output) { - // input[depth_input][dim_input][dim_input] +void make_dense_linearized_cpu(Kernel_nn* kernel, float*** input, float* output, int input_depth, int input_width, int size_output) { + // input[input_depth][input_width][input_width] // output[size_output] float f; for (int l=0; l < size_output; l++) { f = kernel->bias[l]; - for (int i=0; i < depth_input; i++) { - for (int j=0; j < dim_input; j++) { - for (int k=0; k < dim_input; k++) { - f += input[i][j][k]*kernel->weights[k + j*dim_input + i*depth_input][l]; + for (int i=0; i < input_depth; i++) { + for (int j=0; j < input_width; j++) { + for (int k=0; k < input_width; k++) { + f += input[i][j][k]*kernel->weights[k + j*input_width + i*input_depth][l]; } } } @@ -299,10 +299,10 @@ void make_dense_linearized_cpu(Kernel_nn* kernel, float*** input, float* output, #ifdef __CUDACC__ extern "C" #endif -void make_dense_linearized(Kernel_nn* kernel, float*** input, float* output, int depth_input, int dim_input, int size_output) { +void make_dense_linearized(Kernel_nn* kernel, float*** input, float* output, int input_depth, int input_width, int size_output) { #ifndef __CUDACC__ - make_dense_linearized_cpu(kernel, input, output, depth_input, dim_input, size_output); + make_dense_linearized_cpu(kernel, input, output, input_depth, input_width, size_output); #else - make_dense_linearized_device(kernel, input, output, depth_input, dim_input, size_output); + make_dense_linearized_device(kernel, input, output, input_depth, input_width, size_output); #endif } diff --git a/src/cnn/neuron_io.c b/src/cnn/neuron_io.c index 06c935e..51d5ef1 100644 --- a/src/cnn/neuron_io.c +++ b/src/cnn/neuron_io.c @@ -73,7 +73,7 @@ void write_couche(Network* network, int indice_couche, int type_couche, FILE* pt int indice_buffer = 0; if (type_couche == 0) { // Cas du CNN Kernel_cnn* cnn = kernel->cnn; - int output_dim = network->width[indice_couche+1]; + int output_width = network->width[indice_couche+1]; // Écriture du pré-corps uint32_t pre_buffer[7]; @@ -90,9 +90,9 @@ void write_couche(Network* network, int indice_couche, int type_couche, FILE* pt // We need to split in small buffers to keep some free memory in the computer for (int i=0; i < cnn->columns; i++) { indice_buffer = 0; - float buffer[output_dim*output_dim]; - for (int j=0; j < output_dim; j++) { - for (int k=0; k < output_dim; k++) { + float buffer[output_width*output_width]; + for (int j=0; j < output_width; j++) { + for (int k=0; k < output_width; k++) { bufferAdd(cnn->bias[i][j][k]); } } @@ -234,7 +234,7 @@ Network* read_network(char* filename) { return network; } -Kernel* read_kernel(int type_couche, int output_dim, FILE* ptr) { +Kernel* read_kernel(int type_couche, int output_width, FILE* ptr) { Kernel* kernel = (Kernel*)nalloc(1, sizeof(Kernel)); if (type_couche == CNN) { // Cas du CNN // Lecture du "Pré-corps" @@ -262,20 +262,20 @@ Kernel* read_kernel(int type_couche, int output_dim, FILE* ptr) { cnn->v_d_bias = (float***)nalloc(cnn->columns, sizeof(float**)); #endif for (int i=0; i < cnn->columns; i++) { - cnn->bias[i] = (float**)nalloc(output_dim, sizeof(float*)); - cnn->d_bias[i] = (float**)nalloc(output_dim, sizeof(float*)); + cnn->bias[i] = (float**)nalloc(output_width, sizeof(float*)); + cnn->d_bias[i] = (float**)nalloc(output_width, sizeof(float*)); #ifdef ADAM_CNN_BIAS - cnn->s_d_bias[i] = (float**)nalloc(output_dim, sizeof(float*)); - cnn->v_d_bias[i] = (float**)nalloc(output_dim, sizeof(float*)); + cnn->s_d_bias[i] = (float**)nalloc(output_width, sizeof(float*)); + cnn->v_d_bias[i] = (float**)nalloc(output_width, sizeof(float*)); #endif - for (int j=0; j < output_dim; j++) { - cnn->bias[i][j] = (float*)nalloc(output_dim, sizeof(float)); - cnn->d_bias[i][j] = (float*)nalloc(output_dim, sizeof(float)); + for (int j=0; j < output_width; j++) { + cnn->bias[i][j] = (float*)nalloc(output_width, sizeof(float)); + cnn->d_bias[i][j] = (float*)nalloc(output_width, sizeof(float)); #ifdef ADAM_CNN_BIAS - cnn->s_d_bias[i][j] = (float*)nalloc(output_dim, sizeof(float)); - cnn->v_d_bias[i][j] = (float*)nalloc(output_dim, sizeof(float)); + cnn->s_d_bias[i][j] = (float*)nalloc(output_width, sizeof(float)); + cnn->v_d_bias[i][j] = (float*)nalloc(output_width, sizeof(float)); #endif - for (int k=0; k < output_dim; k++) { + for (int k=0; k < output_width; k++) { (void) !fread(&tmp, sizeof(tmp), 1, ptr); cnn->bias[i][j][k] = tmp; cnn->d_bias[i][j][k] = 0.; diff --git a/src/cnn/print.c b/src/cnn/print.c index 4005218..a3f33bc 100644 --- a/src/cnn/print.c +++ b/src/cnn/print.c @@ -11,13 +11,13 @@ #define purple printf("\033[0;35m") #define reset_color printf("\033[0m") -void print_kernel_cnn(Kernel_cnn* ker, int depth_input, int dim_input, int depth_output, int dim_output) { - int k_size = dim_input - dim_output + 1; +void print_kernel_cnn(Kernel_cnn* ker, int input_depth, int input_width, int output_depth, int output_width) { + int k_size = input_width - output_width + 1; // print bias green; - for (int i=0; ibias[i][j][k]); } print_space; @@ -29,9 +29,9 @@ void print_kernel_cnn(Kernel_cnn* ker, int depth_input, int dim_input, int depth //print weights red; - for (int i=0; iweights[i][j][k][l]); diff --git a/src/cnn/train.c b/src/cnn/train.c index 0197659..9f58b70 100644 --- a/src/cnn/train.c +++ b/src/cnn/train.c @@ -153,7 +153,7 @@ void train(int dataset_type, char* images_file, char* labels_file, char* data_di //* Chargement du dataset - int input_dim = -1; + int input_width = -1; int input_depth = -1; int nb_images_total; // Images au total @@ -172,11 +172,11 @@ void train(int dataset_type, char* images_file, char* labels_file, char* data_di images = read_mnist_images(images_file); labels = read_mnist_labels(labels_file); - input_dim = 32; + input_width = 32; input_depth = 1; } else { // Type JPG dataset = loadJpegDataset(data_dir); - input_dim = dataset->height + 4; // image_size + padding + input_width = dataset->height + 4; // image_size + padding input_depth = dataset->numComponents; nb_images_total = dataset->numImages; @@ -185,8 +185,8 @@ void train(int dataset_type, char* images_file, char* labels_file, char* data_di //* Création du réseau Network* network; if (!recover) { - network = create_network_lenet5(LEARNING_RATE, 0, RELU, NORMALIZED_XAVIER, input_dim, input_depth); - //network = create_simple_one(LEARNING_RATE, 0, RELU, GLOROT, input_dim, input_depth); + network = create_network_lenet5(LEARNING_RATE, 0, RELU, NORMALIZED_XAVIER, input_width, input_depth); + //network = create_simple_one(LEARNING_RATE, 0, RELU, GLOROT, input_width, input_depth); } else { network = read_network(recover); network->learning_rate = LEARNING_RATE; diff --git a/src/cnn/utils.c b/src/cnn/utils.c index 9bb864e..6b65c20 100644 --- a/src/cnn/utils.c +++ b/src/cnn/utils.c @@ -33,7 +33,7 @@ void knuth_shuffle(int* tab, int n) { } bool equals_networks(Network* network1, Network* network2) { - int output_dim; + int output_width; checkEquals(size, "size", -1); checkEquals(initialisation, "initialisation", -1); checkEquals(dropout, "dropout", -1); @@ -70,13 +70,13 @@ bool equals_networks(Network* network1, Network* network2) { } } else { // Type CNN - output_dim = network1->width[i+1]; + output_width = network1->width[i+1]; checkEquals(kernel[i]->cnn->k_size, "kernel[i]->k_size", i); checkEquals(kernel[i]->cnn->rows, "kernel[i]->rows", i); checkEquals(kernel[i]->cnn->columns, "kernel[i]->columns", i); for (int j=0; j < network1->kernel[i]->cnn->columns; j++) { - for (int k=0; k < output_dim; k++) { - for (int l=0; l < output_dim; l++) { + for (int k=0; k < output_width; k++) { + for (int l=0; l < output_width; l++) { checkEquals(kernel[i]->cnn->bias[j][k][l], "kernel[i]->cnn->bias[j][k][l]", l); } } @@ -108,7 +108,7 @@ Network* copy_network(Network* network) { int rows; int k_size; int columns; - int output_dim; + int output_width; copyVar(dropout); copyVar(learning_rate); @@ -200,7 +200,7 @@ Network* copy_network(Network* network) { rows = network->kernel[i]->cnn->rows; k_size = network->kernel[i]->cnn->k_size; columns = network->kernel[i]->cnn->columns; - output_dim = network->width[i+1]; + output_width = network->width[i+1]; network_cp->kernel[i]->nn = NULL; @@ -217,20 +217,20 @@ Network* copy_network(Network* network) { network_cp->kernel[i]->cnn->v_d_bias = (float***)nalloc(columns, sizeof(float**)); #endif for (int j=0; j < columns; j++) { - network_cp->kernel[i]->cnn->bias[j] = (float**)nalloc(output_dim, sizeof(float*)); - network_cp->kernel[i]->cnn->d_bias[j] = (float**)nalloc(output_dim, sizeof(float*)); + network_cp->kernel[i]->cnn->bias[j] = (float**)nalloc(output_width, sizeof(float*)); + network_cp->kernel[i]->cnn->d_bias[j] = (float**)nalloc(output_width, sizeof(float*)); #ifdef ADAM_CNN_BIAS - network_cp->kernel[i]->cnn->s_d_bias[j] = (float**)nalloc(output_dim, sizeof(float*)); - network_cp->kernel[i]->cnn->v_d_bias[j] = (float**)nalloc(output_dim, sizeof(float*)); + network_cp->kernel[i]->cnn->s_d_bias[j] = (float**)nalloc(output_width, sizeof(float*)); + network_cp->kernel[i]->cnn->v_d_bias[j] = (float**)nalloc(output_width, sizeof(float*)); #endif - for (int k=0; k < output_dim; k++) { - network_cp->kernel[i]->cnn->bias[j][k] = (float*)nalloc(output_dim, sizeof(float)); - network_cp->kernel[i]->cnn->d_bias[j][k] = (float*)nalloc(output_dim, sizeof(float)); + for (int k=0; k < output_width; k++) { + network_cp->kernel[i]->cnn->bias[j][k] = (float*)nalloc(output_width, sizeof(float)); + network_cp->kernel[i]->cnn->d_bias[j][k] = (float*)nalloc(output_width, sizeof(float)); #ifdef ADAM_CNN_BIAS - network_cp->kernel[i]->cnn->s_d_bias[j][k] = (float*)nalloc(output_dim, sizeof(float)); - network_cp->kernel[i]->cnn->v_d_bias[j][k] = (float*)nalloc(output_dim, sizeof(float)); + network_cp->kernel[i]->cnn->s_d_bias[j][k] = (float*)nalloc(output_width, sizeof(float)); + network_cp->kernel[i]->cnn->v_d_bias[j][k] = (float*)nalloc(output_width, sizeof(float)); #endif - for (int l=0; l < output_dim; l++) { + for (int l=0; l < output_width; l++) { copyVar(kernel[i]->cnn->bias[j][k][l]); network_cp->kernel[i]->cnn->d_bias[j][k][l] = 0.; #ifdef ADAM_CNN_BIAS @@ -324,7 +324,7 @@ void copy_network_parameters(Network* network_src, Network* network_dest) { int rows; int k_size; int columns; - int output_dim; + int output_width; copyVarParams(learning_rate); @@ -348,11 +348,11 @@ void copy_network_parameters(Network* network_src, Network* network_dest) { rows = network_src->kernel[i]->cnn->rows; k_size = network_src->kernel[i]->cnn->k_size; columns = network_src->kernel[i]->cnn->columns; - output_dim = network_src->width[i+1]; + output_width = network_src->width[i+1]; for (int j=0; j < columns; j++) { - for (int k=0; k < output_dim; k++) { - for (int l=0; l < output_dim; l++) { + for (int k=0; k < output_width; k++) { + for (int l=0; l < output_width; l++) { copyVarParams(kernel[i]->cnn->bias[j][k][l]); } } @@ -385,7 +385,7 @@ int count_null_weights(Network* network) { int rows; int k_size; int columns; - int output_dim; + int output_width; for (int i=0; i < size-1; i++) { if (!network->kernel[i]->cnn && network->kernel[i]->nn) { // Cas du NN @@ -407,11 +407,11 @@ int count_null_weights(Network* network) { rows = network->kernel[i]->cnn->rows; k_size = network->kernel[i]->cnn->k_size; columns = network->kernel[i]->cnn->columns; - output_dim = network->width[i+1]; + output_width = network->width[i+1]; for (int j=0; j < columns; j++) { - for (int k=0; k < output_dim; k++) { - for (int l=0; l < output_dim; l++) { + for (int k=0; k < output_width; k++) { + for (int l=0; l < output_width; l++) { null_bias += fabs(network->kernel[i]->cnn->bias[j][k][l]) <= epsilon; } } diff --git a/src/scripts/benchmark_mul.py b/src/scripts/benchmark_mul.py index 2e0d1a4..8d5f7b9 100644 --- a/src/scripts/benchmark_mul.py +++ b/src/scripts/benchmark_mul.py @@ -54,12 +54,12 @@ def generate_data_mul(): def generate_data_conv(): values = [] - output_dim = 40 + output_width = 40 rows = 40 columns = 40 for i in range(10): - values.append(avg([conv_matrix((i+1)*100, output_dim, rows, columns) for j in range(10)])) - print(f"Added ({(i+1)*100}, output_dim, rows, columns)") + values.append(avg([conv_matrix((i+1)*100, output_width, rows, columns) for j in range(10)])) + print(f"Added ({(i+1)*100}, output_width, rows, columns)") with open("result_conv.json", "weights") as file: json.dump(values, file, indent=4) diff --git a/src/scripts/convolution_benchmark.cu b/src/scripts/convolution_benchmark.cu index 78b8ae3..70d397a 100644 --- a/src/scripts/convolution_benchmark.cu +++ b/src/scripts/convolution_benchmark.cu @@ -102,9 +102,9 @@ bool check_matrices_equality(float*** m1, float*** m2, int n, int p, int q, int return true; } -void run_convolution_test(int input_dim, int output_dim, int rows, int columns) { - assert(input_dim >= output_dim); - int k_size = input_dim - output_dim +1; +void run_convolution_test(int input_width, int output_width, int rows, int columns) { + assert(input_width >= output_width); + int k_size = input_width - output_width +1; // Génération des données aléatoires Kernel_cnn* kernel = (Kernel_cnn*)malloc(sizeof(Kernel_cnn)); @@ -145,11 +145,11 @@ void run_convolution_test(int input_dim, int output_dim, int rows, int columns) #endif } - float*** input = create_matrix(kernel->rows, input_dim, input_dim, 5.0f); - float*** output_cpu = create_empty_matrix(kernel->columns, output_dim, output_dim); - float*** output_gpu = create_empty_matrix(kernel->columns, output_dim, output_dim); + float*** input = create_matrix(kernel->rows, input_width, input_width, 5.0f); + float*** output_cpu = create_empty_matrix(kernel->columns, output_width, output_width); + float*** output_gpu = create_empty_matrix(kernel->columns, output_width, output_width); - //printf("(%d, %d, %d, %d) Data generation complete\n", rows, columns, input_dim, output_dim); + //printf("(%d, %d, %d, %d) Data generation complete\n", rows, columns, input_width, output_width); // Lancement des calculs @@ -157,7 +157,7 @@ void run_convolution_test(int input_dim, int output_dim, int rows, int columns) double cpu_time_used, gpu_time_used; start = clock(); - make_convolution_device(kernel, input, output_gpu, output_dim, 1); + make_convolution_device(kernel, input, output_gpu, output_width, 1); end = clock(); gpu_time_used = ((double) (end - start)) / CLOCKS_PER_SEC; @@ -165,15 +165,15 @@ void run_convolution_test(int input_dim, int output_dim, int rows, int columns) start = clock(); - make_convolution_cpu(kernel, input, output_cpu, output_dim, 1); + make_convolution_cpu(kernel, input, output_cpu, output_width, 1); end = clock(); cpu_time_used = ((double) (end - start)) / CLOCKS_PER_SEC; printf("CPU: %lf\n", cpu_time_used); // Vérification de l'égalité des matrices - //printf("(%d, %d, %d, %d) Checking equality.\n", rows, columns, input_dim, output_dim); - if (!check_matrices_equality(output_gpu, output_cpu, kernel->columns, output_dim, output_dim, kernel->k_size)) {// TODO: change acceptation + //printf("(%d, %d, %d, %d) Checking equality.\n", rows, columns, input_width, output_width); + if (!check_matrices_equality(output_gpu, output_cpu, kernel->columns, output_width, output_width, kernel->k_size)) {// TODO: change acceptation //exit(1); } //printf(GREEN "OK\n" RESET); @@ -200,9 +200,9 @@ void run_convolution_test(int input_dim, int output_dim, int rows, int columns) free(kernel->v_d_weights); #endif - free_matrix(input, kernel->rows, input_dim); - free_matrix(output_cpu, kernel->columns, output_dim); - free_matrix(output_gpu, kernel->columns, output_dim); + free_matrix(input, kernel->rows, input_width); + free_matrix(output_cpu, kernel->columns, output_width); + free_matrix(output_gpu, kernel->columns, output_width); } diff --git a/test/cnn_convolution.cu b/test/cnn_convolution.cu index 7738d34..9ca2b25 100644 --- a/test/cnn_convolution.cu +++ b/test/cnn_convolution.cu @@ -93,9 +93,9 @@ bool check_matrices_equality(float*** m1, float*** m2, int n, int p, int q, int return true; } -void run_convolution_test(int input_dim, int output_dim, int rows, int columns) { - assert(input_dim >= output_dim); - int k_size = input_dim - output_dim +1; +void run_convolution_test(int input_width, int output_width, int rows, int columns) { + assert(input_width >= output_width); + int k_size = input_width - output_width +1; // Génération des données aléatoires Kernel_cnn* kernel = (Kernel_cnn*)nalloc(1, sizeof(Kernel_cnn)); @@ -104,12 +104,12 @@ void run_convolution_test(int input_dim, int output_dim, int rows, int columns) kernel->rows = rows; kernel->columns = columns; - // bias[kernel->columns][dim_output][dim_output] - kernel->bias = create_matrix(kernel->columns, output_dim, output_dim, 15.0f); - kernel->d_bias = create_matrix(kernel->columns, output_dim, output_dim, 1.5f); + // bias[kernel->columns][output_width][output_width] + kernel->bias = create_matrix(kernel->columns, output_width, output_width, 15.0f); + kernel->d_bias = create_matrix(kernel->columns, output_width, output_width, 1.5f); #ifdef ADAM_CNN_BIAS - kernel->s_d_bias = create_matrix(kernel->columns, output_dim, output_dim, 1.5f); - kernel->v_d_bias = create_matrix(kernel->columns, output_dim, output_dim, 1.5f); + kernel->s_d_bias = create_matrix(kernel->columns, output_width, output_width, 1.5f); + kernel->v_d_bias = create_matrix(kernel->columns, output_width, output_width, 1.5f); #endif // weights[rows][columns][k_size][k_size] @@ -128,11 +128,11 @@ void run_convolution_test(int input_dim, int output_dim, int rows, int columns) #endif } - float*** input = create_matrix(kernel->rows, input_dim, input_dim, 5.0f); - float*** output_cpu = create_empty_matrix(kernel->columns, output_dim, output_dim); - float*** output_gpu = create_empty_matrix(kernel->columns, output_dim, output_dim); + float*** input = create_matrix(kernel->rows, input_width, input_width, 5.0f); + float*** output_cpu = create_empty_matrix(kernel->columns, output_width, output_width); + float*** output_gpu = create_empty_matrix(kernel->columns, output_width, output_width); - printf("(%d, %d, %d, %d) Data generation complete\n", rows, columns, input_dim, output_dim); + printf("(%d, %d, %d, %d) Data generation complete\n", rows, columns, input_width, output_width); // Lancement des calculs @@ -140,33 +140,33 @@ void run_convolution_test(int input_dim, int output_dim, int rows, int columns) double cpu_time_used, gpu_time_used; start_time = omp_get_wtime(); - make_convolution_device(kernel, input, output_gpu, output_dim, 1); + make_convolution_device(kernel, input, output_gpu, output_width, 1); end_time = omp_get_wtime(); gpu_time_used = end_time - start_time; - printf("(%d, %d, %d, %d) Time used for GPU: %lf seconds\n", rows, columns, input_dim, output_dim, gpu_time_used); + printf("(%d, %d, %d, %d) Time used for GPU: %lf seconds\n", rows, columns, input_width, output_width, gpu_time_used); start_time = omp_get_wtime(); - make_convolution_cpu(kernel, input, output_cpu, output_dim, 1); + make_convolution_cpu(kernel, input, output_cpu, output_width, 1); end_time = omp_get_wtime(); cpu_time_used = end_time - start_time; - printf("(%d, %d, %d, %d) Time used for CPU: %lf seconds\n", rows, columns, input_dim, output_dim, cpu_time_used); + printf("(%d, %d, %d, %d) Time used for CPU: %lf seconds\n", rows, columns, input_width, output_width, cpu_time_used); // Vérification de l'égalité des matrices - printf("(%d, %d, %d, %d) Checking equality.\n", rows, columns, input_dim, output_dim); - if (!check_matrices_equality(output_gpu, output_cpu, kernel->columns, output_dim, output_dim, kernel->k_size)) {// TODO: change acceptation + printf("(%d, %d, %d, %d) Checking equality.\n", rows, columns, input_width, output_width); + if (!check_matrices_equality(output_gpu, output_cpu, kernel->columns, output_width, output_width, kernel->k_size)) {// TODO: change acceptation exit(1); } printf(GREEN "OK\n" RESET); - free_matrix(kernel->bias, kernel->columns, output_dim); - free_matrix(kernel->d_bias, kernel->columns, output_dim); + free_matrix(kernel->bias, kernel->columns, output_width); + free_matrix(kernel->d_bias, kernel->columns, output_width); #ifdef ADAM_CNN_BIAS - free_matrix(kernel->s_d_bias, kernel->columns, output_dim); - free_matrix(kernel->v_d_bias, kernel->columns, output_dim); + free_matrix(kernel->s_d_bias, kernel->columns, output_width); + free_matrix(kernel->v_d_bias, kernel->columns, output_width); #endif for (int i=0; i < kernel->rows; i++) { @@ -184,9 +184,9 @@ void run_convolution_test(int input_dim, int output_dim, int rows, int columns) gree(kernel->v_d_weights); #endif - free_matrix(input, kernel->rows, input_dim); - free_matrix(output_cpu, kernel->columns, output_dim); - free_matrix(output_gpu, kernel->columns, output_dim); + free_matrix(input, kernel->rows, input_width); + free_matrix(output_cpu, kernel->columns, output_width); + free_matrix(output_gpu, kernel->columns, output_width); }