diff --git a/src/cnn/backpropagation.c b/src/cnn/backpropagation.c index a084066..12a231c 100644 --- a/src/cnn/backpropagation.c +++ b/src/cnn/backpropagation.c @@ -514,15 +514,15 @@ __global__ void backward_convolution_dbias_kernel(Kernel_cnn* ker, float*** outp ker->d_bias[idx][idy][idz] += output[idx][idy][idz]; } -__global__ void backward_convolution_dweight_kernel(Kernel_cnn* ker, float*** input, float*** output, int input_depth, int output_depth, int output_width, int k_size) { +__global__ void backward_convolution_dweight_kernel(Kernel_cnn* ker, float*** input, float*** output, int input_depth, int output_depth, int output_width, int kernel_size) { int idx = threadIdx.x + blockDim.x*blockIdx.x; int idy = threadIdx.y + blockDim.y*blockIdx.y; int idz = threadIdx.z + blockDim.z*blockIdx.z; - int idz1 = idz / k_size; - int idz2 = idz % k_size; + int idz1 = idz / kernel_size; + int idz2 = idz % kernel_size; - if (idx >= input_depth || idy >= output_depth || idz1 >= k_size || idz2 >= k_size) { + if (idx >= input_depth || idy >= output_depth || idz1 >= kernel_size || idz2 >= kernel_size) { return; } @@ -560,23 +560,20 @@ __global__ void backward_convolution_propagate_kernel(Kernel_cnn* ker, float*** input[idx][idy][idz] = tmp*( (*d_f)(input_z[idx][idy][idz]) ); } -void backward_convolution_device(Kernel_cnn* ker, float*** input, float*** input_z, float*** output, int input_depth, int input_width, int output_depth, int output_width, int activation, int is_first) { +void backward_convolution_device(Kernel_cnn* kernel, float*** input, float*** input_z, float*** output, int input_depth, int input_width, int output_depth, int output_width, int activation, int is_first, int kernel_size, int padding, int stride) { // Bias Kernel dim3 gridSize1(i_div_up(output_depth, BLOCKSIZE_x), i_div_up(output_width, BLOCKSIZE_y), i_div_up(output_width, BLOCKSIZE_y)); dim3 blockSize1(BLOCKSIZE_x, BLOCKSIZE_y, BLOCKSIZE_z); - backward_convolution_dbias_kernel<<>>(ker, output, output_depth, output_width); + backward_convolution_dbias_kernel<<>>(kernel, output, output_depth, output_width); gpuErrchk( cudaPeekAtLastError() ); gpuErrchk( cudaDeviceSynchronize() ); - // Weights Kernel - int k_size = input_width - output_width +1; - - dim3 gridSize2(i_div_up(input_depth, BLOCKSIZE_x), i_div_up(output_depth, BLOCKSIZE_y), i_div_up(k_size*k_size, BLOCKSIZE_y)); + dim3 gridSize2(i_div_up(input_depth, BLOCKSIZE_x), i_div_up(output_depth, BLOCKSIZE_y), i_div_up(kernel_size*kernel_size, BLOCKSIZE_y)); dim3 blockSize2(BLOCKSIZE_x, BLOCKSIZE_y, BLOCKSIZE_z); - backward_convolution_dweight_kernel<<>>(ker, input, output, input_depth, output_depth, output_width, k_size); + backward_convolution_dweight_kernel<<>>(kernel, input, output, input_depth, output_depth, output_width, kernel_size); gpuErrchk( cudaPeekAtLastError() ); gpuErrchk( cudaDeviceSynchronize() ); @@ -588,7 +585,7 @@ void backward_convolution_device(Kernel_cnn* ker, float*** input, float*** input funcPtr d_function = get_activation_function_cuda(activation); - backward_convolution_propagate_kernel<<>>(ker, input, input_z, output, input_depth, input_width, output_depth, k_size, d_function); + backward_convolution_propagate_kernel<<>>(kernel, input, input_z, output, input_depth, input_width, output_depth, kernel_size, d_function); gpuErrchk( cudaPeekAtLastError() ); gpuErrchk( cudaDeviceSynchronize() ); @@ -597,9 +594,10 @@ void backward_convolution_device(Kernel_cnn* ker, float*** input, float*** input #endif -void backward_convolution_cpu(Kernel_cnn* ker, float*** input, float*** input_z, float*** output, int input_depth, int input_width, int output_depth, int output_width, int activation, int is_first) { +void backward_convolution_cpu(Kernel_cnn* ker, float*** input, float*** input_z, float*** output, int input_depth, int input_width, int output_depth, int output_width, int activation, int is_first, int kernel_size, int padding, int stride) { funcPtr d_function = get_activation_function(activation); + int max_move = kernel_size - padding; // Bias for (int i=0; i < output_depth; i++) { @@ -610,17 +608,17 @@ void backward_convolution_cpu(Kernel_cnn* ker, float*** input, float*** input_z, } } - // Weights - int k_size = input_width - output_width +1; - + // Weights for (int h=0; h < input_depth; h++) { for (int i=0; i < output_depth; i++) { - for (int j=0; j < k_size; j++) { - for (int k=0; k < k_size; k++) { + for (int j=-padding; j < max_move; j++) { + for (int k=-padding; k < max_move; k++) { float tmp = 0; for (int l=0; l < output_width; l++) { for (int m=0; m < output_width; m++) { - tmp += input[h][l+j][m+k]*output[i][l][m]; + if (not_outside(l*stride+j, m*stride+k, 0, input_width)) { + tmp += input[h][l*stride+j][m*stride+k]*output[i][l][m]; + } } } ker->d_weights[h][i][j][k] += tmp; @@ -629,26 +627,35 @@ void backward_convolution_cpu(Kernel_cnn* ker, float*** input, float*** input_z, } } - // Input + // Input TODO if (is_first==1) // Pas besoin de backpropager dans l'input return; - int min_m, max_m, min_n, max_n; for (int i=0; i < input_depth; i++) { for (int j=0; j < input_width; j++) { for (int k=0; k < input_width; k++) { - float tmp = 0; - for (int l=0; l < output_depth; l++) { - min_m = max(0, k_size-1-j); - max_m = min(k_size, input_width - j); - min_n = max(0, k_size-1-k); - max_n = min(k_size, input_width-k); - for (int m=min_m; m < max_m; m++) { - for (int n=min_n; n < max_n; n++) { - tmp += output[l][j-k_size+m+1][k-k_size+n+1]*ker->weights[i][l][m][n]; + input[i][j][k] = 0; + } + } + } + for (int h=0; h < input_depth; h++) { + for (int i=0; i < output_depth; i++) { + for (int j=-padding; j < max_move; j++) { + for (int k=-padding; k < max_move; k++) { + for (int l=0; l < output_width; l++) { + for (int m=0; m < output_width; m++) { + if (not_outside(l*stride+j, m*stride+k, 0, input_width)) { + input[h][l*stride+j][m*stride+k] += output[i][l][m]*ker->weights[h][i][j+padding][k+padding]; + } } } } - input[i][j][k] = tmp*d_function(input_z[i][j][k]); + } + } + } + for (int i=0; i < input_depth; i++) { + for (int j=0; j < input_width; j++) { + for (int k=0; k < input_width; k++) { + input[i][j][k] = input[i][j][k]*d_function(input_z[i][j][k]); } } } @@ -657,10 +664,10 @@ void backward_convolution_cpu(Kernel_cnn* ker, float*** input, float*** input_z, #ifdef __CUDACC__ extern "C" #endif -void backward_convolution(Kernel_cnn* ker, float*** input, float*** input_z, float*** output, int input_depth, int input_width, int output_depth, int output_width, int activation, int is_first) { +void backward_convolution(Kernel_cnn* ker, float*** input, float*** input_z, float*** output, int input_depth, int input_width, int output_depth, int output_width, int activation, int is_first, int kernel_size, int padding, int stride) { #ifndef __CUDACC__ - backward_convolution_cpu(ker, input, input_z, output, input_depth, input_width, output_depth, output_width, activation, is_first); + backward_convolution_cpu(ker, input, input_z, output, input_depth, input_width, output_depth, output_width, activation, is_first, kernel_size, padding, stride); #else - backward_convolution_device(ker, input, input_z, output, input_depth, input_width, output_depth, output_width, activation, is_first); + backward_convolution_device(ker, input, input_z, output, input_depth, input_width, output_depth, output_width, activation, is_first, kernel_size, padding, stride); #endif } \ No newline at end of file diff --git a/src/cnn/backpropagation.cu b/src/cnn/backpropagation.cu index a084066..12a231c 100644 --- a/src/cnn/backpropagation.cu +++ b/src/cnn/backpropagation.cu @@ -514,15 +514,15 @@ __global__ void backward_convolution_dbias_kernel(Kernel_cnn* ker, float*** outp ker->d_bias[idx][idy][idz] += output[idx][idy][idz]; } -__global__ void backward_convolution_dweight_kernel(Kernel_cnn* ker, float*** input, float*** output, int input_depth, int output_depth, int output_width, int k_size) { +__global__ void backward_convolution_dweight_kernel(Kernel_cnn* ker, float*** input, float*** output, int input_depth, int output_depth, int output_width, int kernel_size) { int idx = threadIdx.x + blockDim.x*blockIdx.x; int idy = threadIdx.y + blockDim.y*blockIdx.y; int idz = threadIdx.z + blockDim.z*blockIdx.z; - int idz1 = idz / k_size; - int idz2 = idz % k_size; + int idz1 = idz / kernel_size; + int idz2 = idz % kernel_size; - if (idx >= input_depth || idy >= output_depth || idz1 >= k_size || idz2 >= k_size) { + if (idx >= input_depth || idy >= output_depth || idz1 >= kernel_size || idz2 >= kernel_size) { return; } @@ -560,23 +560,20 @@ __global__ void backward_convolution_propagate_kernel(Kernel_cnn* ker, float*** input[idx][idy][idz] = tmp*( (*d_f)(input_z[idx][idy][idz]) ); } -void backward_convolution_device(Kernel_cnn* ker, float*** input, float*** input_z, float*** output, int input_depth, int input_width, int output_depth, int output_width, int activation, int is_first) { +void backward_convolution_device(Kernel_cnn* kernel, float*** input, float*** input_z, float*** output, int input_depth, int input_width, int output_depth, int output_width, int activation, int is_first, int kernel_size, int padding, int stride) { // Bias Kernel dim3 gridSize1(i_div_up(output_depth, BLOCKSIZE_x), i_div_up(output_width, BLOCKSIZE_y), i_div_up(output_width, BLOCKSIZE_y)); dim3 blockSize1(BLOCKSIZE_x, BLOCKSIZE_y, BLOCKSIZE_z); - backward_convolution_dbias_kernel<<>>(ker, output, output_depth, output_width); + backward_convolution_dbias_kernel<<>>(kernel, output, output_depth, output_width); gpuErrchk( cudaPeekAtLastError() ); gpuErrchk( cudaDeviceSynchronize() ); - // Weights Kernel - int k_size = input_width - output_width +1; - - dim3 gridSize2(i_div_up(input_depth, BLOCKSIZE_x), i_div_up(output_depth, BLOCKSIZE_y), i_div_up(k_size*k_size, BLOCKSIZE_y)); + dim3 gridSize2(i_div_up(input_depth, BLOCKSIZE_x), i_div_up(output_depth, BLOCKSIZE_y), i_div_up(kernel_size*kernel_size, BLOCKSIZE_y)); dim3 blockSize2(BLOCKSIZE_x, BLOCKSIZE_y, BLOCKSIZE_z); - backward_convolution_dweight_kernel<<>>(ker, input, output, input_depth, output_depth, output_width, k_size); + backward_convolution_dweight_kernel<<>>(kernel, input, output, input_depth, output_depth, output_width, kernel_size); gpuErrchk( cudaPeekAtLastError() ); gpuErrchk( cudaDeviceSynchronize() ); @@ -588,7 +585,7 @@ void backward_convolution_device(Kernel_cnn* ker, float*** input, float*** input funcPtr d_function = get_activation_function_cuda(activation); - backward_convolution_propagate_kernel<<>>(ker, input, input_z, output, input_depth, input_width, output_depth, k_size, d_function); + backward_convolution_propagate_kernel<<>>(kernel, input, input_z, output, input_depth, input_width, output_depth, kernel_size, d_function); gpuErrchk( cudaPeekAtLastError() ); gpuErrchk( cudaDeviceSynchronize() ); @@ -597,9 +594,10 @@ void backward_convolution_device(Kernel_cnn* ker, float*** input, float*** input #endif -void backward_convolution_cpu(Kernel_cnn* ker, float*** input, float*** input_z, float*** output, int input_depth, int input_width, int output_depth, int output_width, int activation, int is_first) { +void backward_convolution_cpu(Kernel_cnn* ker, float*** input, float*** input_z, float*** output, int input_depth, int input_width, int output_depth, int output_width, int activation, int is_first, int kernel_size, int padding, int stride) { funcPtr d_function = get_activation_function(activation); + int max_move = kernel_size - padding; // Bias for (int i=0; i < output_depth; i++) { @@ -610,17 +608,17 @@ void backward_convolution_cpu(Kernel_cnn* ker, float*** input, float*** input_z, } } - // Weights - int k_size = input_width - output_width +1; - + // Weights for (int h=0; h < input_depth; h++) { for (int i=0; i < output_depth; i++) { - for (int j=0; j < k_size; j++) { - for (int k=0; k < k_size; k++) { + for (int j=-padding; j < max_move; j++) { + for (int k=-padding; k < max_move; k++) { float tmp = 0; for (int l=0; l < output_width; l++) { for (int m=0; m < output_width; m++) { - tmp += input[h][l+j][m+k]*output[i][l][m]; + if (not_outside(l*stride+j, m*stride+k, 0, input_width)) { + tmp += input[h][l*stride+j][m*stride+k]*output[i][l][m]; + } } } ker->d_weights[h][i][j][k] += tmp; @@ -629,26 +627,35 @@ void backward_convolution_cpu(Kernel_cnn* ker, float*** input, float*** input_z, } } - // Input + // Input TODO if (is_first==1) // Pas besoin de backpropager dans l'input return; - int min_m, max_m, min_n, max_n; for (int i=0; i < input_depth; i++) { for (int j=0; j < input_width; j++) { for (int k=0; k < input_width; k++) { - float tmp = 0; - for (int l=0; l < output_depth; l++) { - min_m = max(0, k_size-1-j); - max_m = min(k_size, input_width - j); - min_n = max(0, k_size-1-k); - max_n = min(k_size, input_width-k); - for (int m=min_m; m < max_m; m++) { - for (int n=min_n; n < max_n; n++) { - tmp += output[l][j-k_size+m+1][k-k_size+n+1]*ker->weights[i][l][m][n]; + input[i][j][k] = 0; + } + } + } + for (int h=0; h < input_depth; h++) { + for (int i=0; i < output_depth; i++) { + for (int j=-padding; j < max_move; j++) { + for (int k=-padding; k < max_move; k++) { + for (int l=0; l < output_width; l++) { + for (int m=0; m < output_width; m++) { + if (not_outside(l*stride+j, m*stride+k, 0, input_width)) { + input[h][l*stride+j][m*stride+k] += output[i][l][m]*ker->weights[h][i][j+padding][k+padding]; + } } } } - input[i][j][k] = tmp*d_function(input_z[i][j][k]); + } + } + } + for (int i=0; i < input_depth; i++) { + for (int j=0; j < input_width; j++) { + for (int k=0; k < input_width; k++) { + input[i][j][k] = input[i][j][k]*d_function(input_z[i][j][k]); } } } @@ -657,10 +664,10 @@ void backward_convolution_cpu(Kernel_cnn* ker, float*** input, float*** input_z, #ifdef __CUDACC__ extern "C" #endif -void backward_convolution(Kernel_cnn* ker, float*** input, float*** input_z, float*** output, int input_depth, int input_width, int output_depth, int output_width, int activation, int is_first) { +void backward_convolution(Kernel_cnn* ker, float*** input, float*** input_z, float*** output, int input_depth, int input_width, int output_depth, int output_width, int activation, int is_first, int kernel_size, int padding, int stride) { #ifndef __CUDACC__ - backward_convolution_cpu(ker, input, input_z, output, input_depth, input_width, output_depth, output_width, activation, is_first); + backward_convolution_cpu(ker, input, input_z, output, input_depth, input_width, output_depth, output_width, activation, is_first, kernel_size, padding, stride); #else - backward_convolution_device(ker, input, input_z, output, input_depth, input_width, output_depth, output_width, activation, is_first); + backward_convolution_device(ker, input, input_z, output, input_depth, input_width, output_depth, output_width, activation, is_first, kernel_size, padding, stride); #endif } \ No newline at end of file diff --git a/src/cnn/cnn.c b/src/cnn/cnn.c index 520ea0b..43ad751 100644 --- a/src/cnn/cnn.c +++ b/src/cnn/cnn.c @@ -263,7 +263,8 @@ void backward_propagation(Network* network, int wanted_number) { if (k_i->cnn) { // Convolution - backward_convolution(k_i->cnn, input, input_z, output, input_depth, input_width, output_depth, output_width, -activation, is_last_layer); + int kernel_size = k_i->cnn->k_size; + backward_convolution(k_i->cnn, input, input_z, output, input_depth, input_width, output_depth, output_width, -activation, is_last_layer, kernel_size, padding, stride); } else if (k_i->nn) { // Full connection if (k_i->linearisation == DOESNT_LINEARISE) { // Vecteur -> Vecteur backward_dense(k_i->nn, input[0][0], input_z[0][0], output[0][0], input_width, output_width, -activation, is_last_layer); diff --git a/src/cnn/include/backpropagation.h b/src/cnn/include/backpropagation.h index b723b3d..049cc9e 100644 --- a/src/cnn/include/backpropagation.h +++ b/src/cnn/include/backpropagation.h @@ -68,6 +68,6 @@ extern "C" /* * Transfert les informations d'erreur à travers un couche de convolution */ -void backward_convolution(Kernel_cnn* ker, float*** input, float*** input_z, float*** output, int input_depth, int input_width, int output_depth, int output_width, int activation, int is_first); +void backward_convolution(Kernel_cnn* ker, float*** input, float*** input_z, float*** output, int input_depth, int input_width, int output_depth, int output_width, int activation, int is_first, int kernel_size, int padding, int stride); #endif