Add stride, padding to the backprop of convolution

This commit is contained in:
julienChemillier 2023-05-14 15:21:07 +02:00
parent e186839ec6
commit 46333299bd
4 changed files with 85 additions and 70 deletions

View File

@ -514,15 +514,15 @@ __global__ void backward_convolution_dbias_kernel(Kernel_cnn* ker, float*** outp
ker->d_bias[idx][idy][idz] += output[idx][idy][idz];
}
__global__ void backward_convolution_dweight_kernel(Kernel_cnn* ker, float*** input, float*** output, int input_depth, int output_depth, int output_width, int k_size) {
__global__ void backward_convolution_dweight_kernel(Kernel_cnn* ker, float*** input, float*** output, int input_depth, int output_depth, int output_width, int kernel_size) {
int idx = threadIdx.x + blockDim.x*blockIdx.x;
int idy = threadIdx.y + blockDim.y*blockIdx.y;
int idz = threadIdx.z + blockDim.z*blockIdx.z;
int idz1 = idz / k_size;
int idz2 = idz % k_size;
int idz1 = idz / kernel_size;
int idz2 = idz % kernel_size;
if (idx >= input_depth || idy >= output_depth || idz1 >= k_size || idz2 >= k_size) {
if (idx >= input_depth || idy >= output_depth || idz1 >= kernel_size || idz2 >= kernel_size) {
return;
}
@ -560,23 +560,20 @@ __global__ void backward_convolution_propagate_kernel(Kernel_cnn* ker, float***
input[idx][idy][idz] = tmp*( (*d_f)(input_z[idx][idy][idz]) );
}
void backward_convolution_device(Kernel_cnn* ker, float*** input, float*** input_z, float*** output, int input_depth, int input_width, int output_depth, int output_width, int activation, int is_first) {
void backward_convolution_device(Kernel_cnn* kernel, float*** input, float*** input_z, float*** output, int input_depth, int input_width, int output_depth, int output_width, int activation, int is_first, int kernel_size, int padding, int stride) {
// Bias Kernel
dim3 gridSize1(i_div_up(output_depth, BLOCKSIZE_x), i_div_up(output_width, BLOCKSIZE_y), i_div_up(output_width, BLOCKSIZE_y));
dim3 blockSize1(BLOCKSIZE_x, BLOCKSIZE_y, BLOCKSIZE_z);
backward_convolution_dbias_kernel<<<gridSize1, blockSize1>>>(ker, output, output_depth, output_width);
backward_convolution_dbias_kernel<<<gridSize1, blockSize1>>>(kernel, output, output_depth, output_width);
gpuErrchk( cudaPeekAtLastError() );
gpuErrchk( cudaDeviceSynchronize() );
// Weights Kernel
int k_size = input_width - output_width +1;
dim3 gridSize2(i_div_up(input_depth, BLOCKSIZE_x), i_div_up(output_depth, BLOCKSIZE_y), i_div_up(k_size*k_size, BLOCKSIZE_y));
dim3 gridSize2(i_div_up(input_depth, BLOCKSIZE_x), i_div_up(output_depth, BLOCKSIZE_y), i_div_up(kernel_size*kernel_size, BLOCKSIZE_y));
dim3 blockSize2(BLOCKSIZE_x, BLOCKSIZE_y, BLOCKSIZE_z);
backward_convolution_dweight_kernel<<<gridSize2, blockSize2>>>(ker, input, output, input_depth, output_depth, output_width, k_size);
backward_convolution_dweight_kernel<<<gridSize2, blockSize2>>>(kernel, input, output, input_depth, output_depth, output_width, kernel_size);
gpuErrchk( cudaPeekAtLastError() );
gpuErrchk( cudaDeviceSynchronize() );
@ -588,7 +585,7 @@ void backward_convolution_device(Kernel_cnn* ker, float*** input, float*** input
funcPtr d_function = get_activation_function_cuda(activation);
backward_convolution_propagate_kernel<<<gridSize3, blockSize3>>>(ker, input, input_z, output, input_depth, input_width, output_depth, k_size, d_function);
backward_convolution_propagate_kernel<<<gridSize3, blockSize3>>>(kernel, input, input_z, output, input_depth, input_width, output_depth, kernel_size, d_function);
gpuErrchk( cudaPeekAtLastError() );
gpuErrchk( cudaDeviceSynchronize() );
@ -597,9 +594,10 @@ void backward_convolution_device(Kernel_cnn* ker, float*** input, float*** input
#endif
void backward_convolution_cpu(Kernel_cnn* ker, float*** input, float*** input_z, float*** output, int input_depth, int input_width, int output_depth, int output_width, int activation, int is_first) {
void backward_convolution_cpu(Kernel_cnn* ker, float*** input, float*** input_z, float*** output, int input_depth, int input_width, int output_depth, int output_width, int activation, int is_first, int kernel_size, int padding, int stride) {
funcPtr d_function = get_activation_function(activation);
int max_move = kernel_size - padding;
// Bias
for (int i=0; i < output_depth; i++) {
@ -610,17 +608,17 @@ void backward_convolution_cpu(Kernel_cnn* ker, float*** input, float*** input_z,
}
}
// Weights
int k_size = input_width - output_width +1;
// Weights
for (int h=0; h < input_depth; h++) {
for (int i=0; i < output_depth; i++) {
for (int j=0; j < k_size; j++) {
for (int k=0; k < k_size; k++) {
for (int j=-padding; j < max_move; j++) {
for (int k=-padding; k < max_move; k++) {
float tmp = 0;
for (int l=0; l < output_width; l++) {
for (int m=0; m < output_width; m++) {
tmp += input[h][l+j][m+k]*output[i][l][m];
if (not_outside(l*stride+j, m*stride+k, 0, input_width)) {
tmp += input[h][l*stride+j][m*stride+k]*output[i][l][m];
}
}
}
ker->d_weights[h][i][j][k] += tmp;
@ -629,26 +627,35 @@ void backward_convolution_cpu(Kernel_cnn* ker, float*** input, float*** input_z,
}
}
// Input
// Input TODO
if (is_first==1) // Pas besoin de backpropager dans l'input
return;
int min_m, max_m, min_n, max_n;
for (int i=0; i < input_depth; i++) {
for (int j=0; j < input_width; j++) {
for (int k=0; k < input_width; k++) {
float tmp = 0;
for (int l=0; l < output_depth; l++) {
min_m = max(0, k_size-1-j);
max_m = min(k_size, input_width - j);
min_n = max(0, k_size-1-k);
max_n = min(k_size, input_width-k);
for (int m=min_m; m < max_m; m++) {
for (int n=min_n; n < max_n; n++) {
tmp += output[l][j-k_size+m+1][k-k_size+n+1]*ker->weights[i][l][m][n];
input[i][j][k] = 0;
}
}
}
for (int h=0; h < input_depth; h++) {
for (int i=0; i < output_depth; i++) {
for (int j=-padding; j < max_move; j++) {
for (int k=-padding; k < max_move; k++) {
for (int l=0; l < output_width; l++) {
for (int m=0; m < output_width; m++) {
if (not_outside(l*stride+j, m*stride+k, 0, input_width)) {
input[h][l*stride+j][m*stride+k] += output[i][l][m]*ker->weights[h][i][j+padding][k+padding];
}
}
}
}
input[i][j][k] = tmp*d_function(input_z[i][j][k]);
}
}
}
for (int i=0; i < input_depth; i++) {
for (int j=0; j < input_width; j++) {
for (int k=0; k < input_width; k++) {
input[i][j][k] = input[i][j][k]*d_function(input_z[i][j][k]);
}
}
}
@ -657,10 +664,10 @@ void backward_convolution_cpu(Kernel_cnn* ker, float*** input, float*** input_z,
#ifdef __CUDACC__
extern "C"
#endif
void backward_convolution(Kernel_cnn* ker, float*** input, float*** input_z, float*** output, int input_depth, int input_width, int output_depth, int output_width, int activation, int is_first) {
void backward_convolution(Kernel_cnn* ker, float*** input, float*** input_z, float*** output, int input_depth, int input_width, int output_depth, int output_width, int activation, int is_first, int kernel_size, int padding, int stride) {
#ifndef __CUDACC__
backward_convolution_cpu(ker, input, input_z, output, input_depth, input_width, output_depth, output_width, activation, is_first);
backward_convolution_cpu(ker, input, input_z, output, input_depth, input_width, output_depth, output_width, activation, is_first, kernel_size, padding, stride);
#else
backward_convolution_device(ker, input, input_z, output, input_depth, input_width, output_depth, output_width, activation, is_first);
backward_convolution_device(ker, input, input_z, output, input_depth, input_width, output_depth, output_width, activation, is_first, kernel_size, padding, stride);
#endif
}

View File

@ -514,15 +514,15 @@ __global__ void backward_convolution_dbias_kernel(Kernel_cnn* ker, float*** outp
ker->d_bias[idx][idy][idz] += output[idx][idy][idz];
}
__global__ void backward_convolution_dweight_kernel(Kernel_cnn* ker, float*** input, float*** output, int input_depth, int output_depth, int output_width, int k_size) {
__global__ void backward_convolution_dweight_kernel(Kernel_cnn* ker, float*** input, float*** output, int input_depth, int output_depth, int output_width, int kernel_size) {
int idx = threadIdx.x + blockDim.x*blockIdx.x;
int idy = threadIdx.y + blockDim.y*blockIdx.y;
int idz = threadIdx.z + blockDim.z*blockIdx.z;
int idz1 = idz / k_size;
int idz2 = idz % k_size;
int idz1 = idz / kernel_size;
int idz2 = idz % kernel_size;
if (idx >= input_depth || idy >= output_depth || idz1 >= k_size || idz2 >= k_size) {
if (idx >= input_depth || idy >= output_depth || idz1 >= kernel_size || idz2 >= kernel_size) {
return;
}
@ -560,23 +560,20 @@ __global__ void backward_convolution_propagate_kernel(Kernel_cnn* ker, float***
input[idx][idy][idz] = tmp*( (*d_f)(input_z[idx][idy][idz]) );
}
void backward_convolution_device(Kernel_cnn* ker, float*** input, float*** input_z, float*** output, int input_depth, int input_width, int output_depth, int output_width, int activation, int is_first) {
void backward_convolution_device(Kernel_cnn* kernel, float*** input, float*** input_z, float*** output, int input_depth, int input_width, int output_depth, int output_width, int activation, int is_first, int kernel_size, int padding, int stride) {
// Bias Kernel
dim3 gridSize1(i_div_up(output_depth, BLOCKSIZE_x), i_div_up(output_width, BLOCKSIZE_y), i_div_up(output_width, BLOCKSIZE_y));
dim3 blockSize1(BLOCKSIZE_x, BLOCKSIZE_y, BLOCKSIZE_z);
backward_convolution_dbias_kernel<<<gridSize1, blockSize1>>>(ker, output, output_depth, output_width);
backward_convolution_dbias_kernel<<<gridSize1, blockSize1>>>(kernel, output, output_depth, output_width);
gpuErrchk( cudaPeekAtLastError() );
gpuErrchk( cudaDeviceSynchronize() );
// Weights Kernel
int k_size = input_width - output_width +1;
dim3 gridSize2(i_div_up(input_depth, BLOCKSIZE_x), i_div_up(output_depth, BLOCKSIZE_y), i_div_up(k_size*k_size, BLOCKSIZE_y));
dim3 gridSize2(i_div_up(input_depth, BLOCKSIZE_x), i_div_up(output_depth, BLOCKSIZE_y), i_div_up(kernel_size*kernel_size, BLOCKSIZE_y));
dim3 blockSize2(BLOCKSIZE_x, BLOCKSIZE_y, BLOCKSIZE_z);
backward_convolution_dweight_kernel<<<gridSize2, blockSize2>>>(ker, input, output, input_depth, output_depth, output_width, k_size);
backward_convolution_dweight_kernel<<<gridSize2, blockSize2>>>(kernel, input, output, input_depth, output_depth, output_width, kernel_size);
gpuErrchk( cudaPeekAtLastError() );
gpuErrchk( cudaDeviceSynchronize() );
@ -588,7 +585,7 @@ void backward_convolution_device(Kernel_cnn* ker, float*** input, float*** input
funcPtr d_function = get_activation_function_cuda(activation);
backward_convolution_propagate_kernel<<<gridSize3, blockSize3>>>(ker, input, input_z, output, input_depth, input_width, output_depth, k_size, d_function);
backward_convolution_propagate_kernel<<<gridSize3, blockSize3>>>(kernel, input, input_z, output, input_depth, input_width, output_depth, kernel_size, d_function);
gpuErrchk( cudaPeekAtLastError() );
gpuErrchk( cudaDeviceSynchronize() );
@ -597,9 +594,10 @@ void backward_convolution_device(Kernel_cnn* ker, float*** input, float*** input
#endif
void backward_convolution_cpu(Kernel_cnn* ker, float*** input, float*** input_z, float*** output, int input_depth, int input_width, int output_depth, int output_width, int activation, int is_first) {
void backward_convolution_cpu(Kernel_cnn* ker, float*** input, float*** input_z, float*** output, int input_depth, int input_width, int output_depth, int output_width, int activation, int is_first, int kernel_size, int padding, int stride) {
funcPtr d_function = get_activation_function(activation);
int max_move = kernel_size - padding;
// Bias
for (int i=0; i < output_depth; i++) {
@ -610,17 +608,17 @@ void backward_convolution_cpu(Kernel_cnn* ker, float*** input, float*** input_z,
}
}
// Weights
int k_size = input_width - output_width +1;
// Weights
for (int h=0; h < input_depth; h++) {
for (int i=0; i < output_depth; i++) {
for (int j=0; j < k_size; j++) {
for (int k=0; k < k_size; k++) {
for (int j=-padding; j < max_move; j++) {
for (int k=-padding; k < max_move; k++) {
float tmp = 0;
for (int l=0; l < output_width; l++) {
for (int m=0; m < output_width; m++) {
tmp += input[h][l+j][m+k]*output[i][l][m];
if (not_outside(l*stride+j, m*stride+k, 0, input_width)) {
tmp += input[h][l*stride+j][m*stride+k]*output[i][l][m];
}
}
}
ker->d_weights[h][i][j][k] += tmp;
@ -629,26 +627,35 @@ void backward_convolution_cpu(Kernel_cnn* ker, float*** input, float*** input_z,
}
}
// Input
// Input TODO
if (is_first==1) // Pas besoin de backpropager dans l'input
return;
int min_m, max_m, min_n, max_n;
for (int i=0; i < input_depth; i++) {
for (int j=0; j < input_width; j++) {
for (int k=0; k < input_width; k++) {
float tmp = 0;
for (int l=0; l < output_depth; l++) {
min_m = max(0, k_size-1-j);
max_m = min(k_size, input_width - j);
min_n = max(0, k_size-1-k);
max_n = min(k_size, input_width-k);
for (int m=min_m; m < max_m; m++) {
for (int n=min_n; n < max_n; n++) {
tmp += output[l][j-k_size+m+1][k-k_size+n+1]*ker->weights[i][l][m][n];
input[i][j][k] = 0;
}
}
}
for (int h=0; h < input_depth; h++) {
for (int i=0; i < output_depth; i++) {
for (int j=-padding; j < max_move; j++) {
for (int k=-padding; k < max_move; k++) {
for (int l=0; l < output_width; l++) {
for (int m=0; m < output_width; m++) {
if (not_outside(l*stride+j, m*stride+k, 0, input_width)) {
input[h][l*stride+j][m*stride+k] += output[i][l][m]*ker->weights[h][i][j+padding][k+padding];
}
}
}
}
input[i][j][k] = tmp*d_function(input_z[i][j][k]);
}
}
}
for (int i=0; i < input_depth; i++) {
for (int j=0; j < input_width; j++) {
for (int k=0; k < input_width; k++) {
input[i][j][k] = input[i][j][k]*d_function(input_z[i][j][k]);
}
}
}
@ -657,10 +664,10 @@ void backward_convolution_cpu(Kernel_cnn* ker, float*** input, float*** input_z,
#ifdef __CUDACC__
extern "C"
#endif
void backward_convolution(Kernel_cnn* ker, float*** input, float*** input_z, float*** output, int input_depth, int input_width, int output_depth, int output_width, int activation, int is_first) {
void backward_convolution(Kernel_cnn* ker, float*** input, float*** input_z, float*** output, int input_depth, int input_width, int output_depth, int output_width, int activation, int is_first, int kernel_size, int padding, int stride) {
#ifndef __CUDACC__
backward_convolution_cpu(ker, input, input_z, output, input_depth, input_width, output_depth, output_width, activation, is_first);
backward_convolution_cpu(ker, input, input_z, output, input_depth, input_width, output_depth, output_width, activation, is_first, kernel_size, padding, stride);
#else
backward_convolution_device(ker, input, input_z, output, input_depth, input_width, output_depth, output_width, activation, is_first);
backward_convolution_device(ker, input, input_z, output, input_depth, input_width, output_depth, output_width, activation, is_first, kernel_size, padding, stride);
#endif
}

View File

@ -263,7 +263,8 @@ void backward_propagation(Network* network, int wanted_number) {
if (k_i->cnn) { // Convolution
backward_convolution(k_i->cnn, input, input_z, output, input_depth, input_width, output_depth, output_width, -activation, is_last_layer);
int kernel_size = k_i->cnn->k_size;
backward_convolution(k_i->cnn, input, input_z, output, input_depth, input_width, output_depth, output_width, -activation, is_last_layer, kernel_size, padding, stride);
} else if (k_i->nn) { // Full connection
if (k_i->linearisation == DOESNT_LINEARISE) { // Vecteur -> Vecteur
backward_dense(k_i->nn, input[0][0], input_z[0][0], output[0][0], input_width, output_width, -activation, is_last_layer);

View File

@ -68,6 +68,6 @@ extern "C"
/*
* Transfert les informations d'erreur à travers un couche de convolution
*/
void backward_convolution(Kernel_cnn* ker, float*** input, float*** input_z, float*** output, int input_depth, int input_width, int output_depth, int output_width, int activation, int is_first);
void backward_convolution(Kernel_cnn* ker, float*** input, float*** input_z, float*** output, int input_depth, int input_width, int output_depth, int output_width, int activation, int is_first, int kernel_size, int padding, int stride);
#endif