Add stride, padding to the backprop of convolution

This commit is contained in:
julienChemillier 2023-05-14 15:21:07 +02:00
parent e186839ec6
commit 46333299bd
4 changed files with 85 additions and 70 deletions

View File

@ -514,15 +514,15 @@ __global__ void backward_convolution_dbias_kernel(Kernel_cnn* ker, float*** outp
ker->d_bias[idx][idy][idz] += output[idx][idy][idz]; ker->d_bias[idx][idy][idz] += output[idx][idy][idz];
} }
__global__ void backward_convolution_dweight_kernel(Kernel_cnn* ker, float*** input, float*** output, int input_depth, int output_depth, int output_width, int k_size) { __global__ void backward_convolution_dweight_kernel(Kernel_cnn* ker, float*** input, float*** output, int input_depth, int output_depth, int output_width, int kernel_size) {
int idx = threadIdx.x + blockDim.x*blockIdx.x; int idx = threadIdx.x + blockDim.x*blockIdx.x;
int idy = threadIdx.y + blockDim.y*blockIdx.y; int idy = threadIdx.y + blockDim.y*blockIdx.y;
int idz = threadIdx.z + blockDim.z*blockIdx.z; int idz = threadIdx.z + blockDim.z*blockIdx.z;
int idz1 = idz / k_size; int idz1 = idz / kernel_size;
int idz2 = idz % k_size; int idz2 = idz % kernel_size;
if (idx >= input_depth || idy >= output_depth || idz1 >= k_size || idz2 >= k_size) { if (idx >= input_depth || idy >= output_depth || idz1 >= kernel_size || idz2 >= kernel_size) {
return; return;
} }
@ -560,23 +560,20 @@ __global__ void backward_convolution_propagate_kernel(Kernel_cnn* ker, float***
input[idx][idy][idz] = tmp*( (*d_f)(input_z[idx][idy][idz]) ); input[idx][idy][idz] = tmp*( (*d_f)(input_z[idx][idy][idz]) );
} }
void backward_convolution_device(Kernel_cnn* ker, float*** input, float*** input_z, float*** output, int input_depth, int input_width, int output_depth, int output_width, int activation, int is_first) { void backward_convolution_device(Kernel_cnn* kernel, float*** input, float*** input_z, float*** output, int input_depth, int input_width, int output_depth, int output_width, int activation, int is_first, int kernel_size, int padding, int stride) {
// Bias Kernel // Bias Kernel
dim3 gridSize1(i_div_up(output_depth, BLOCKSIZE_x), i_div_up(output_width, BLOCKSIZE_y), i_div_up(output_width, BLOCKSIZE_y)); dim3 gridSize1(i_div_up(output_depth, BLOCKSIZE_x), i_div_up(output_width, BLOCKSIZE_y), i_div_up(output_width, BLOCKSIZE_y));
dim3 blockSize1(BLOCKSIZE_x, BLOCKSIZE_y, BLOCKSIZE_z); dim3 blockSize1(BLOCKSIZE_x, BLOCKSIZE_y, BLOCKSIZE_z);
backward_convolution_dbias_kernel<<<gridSize1, blockSize1>>>(ker, output, output_depth, output_width); backward_convolution_dbias_kernel<<<gridSize1, blockSize1>>>(kernel, output, output_depth, output_width);
gpuErrchk( cudaPeekAtLastError() ); gpuErrchk( cudaPeekAtLastError() );
gpuErrchk( cudaDeviceSynchronize() ); gpuErrchk( cudaDeviceSynchronize() );
// Weights Kernel dim3 gridSize2(i_div_up(input_depth, BLOCKSIZE_x), i_div_up(output_depth, BLOCKSIZE_y), i_div_up(kernel_size*kernel_size, BLOCKSIZE_y));
int k_size = input_width - output_width +1;
dim3 gridSize2(i_div_up(input_depth, BLOCKSIZE_x), i_div_up(output_depth, BLOCKSIZE_y), i_div_up(k_size*k_size, BLOCKSIZE_y));
dim3 blockSize2(BLOCKSIZE_x, BLOCKSIZE_y, BLOCKSIZE_z); dim3 blockSize2(BLOCKSIZE_x, BLOCKSIZE_y, BLOCKSIZE_z);
backward_convolution_dweight_kernel<<<gridSize2, blockSize2>>>(ker, input, output, input_depth, output_depth, output_width, k_size); backward_convolution_dweight_kernel<<<gridSize2, blockSize2>>>(kernel, input, output, input_depth, output_depth, output_width, kernel_size);
gpuErrchk( cudaPeekAtLastError() ); gpuErrchk( cudaPeekAtLastError() );
gpuErrchk( cudaDeviceSynchronize() ); gpuErrchk( cudaDeviceSynchronize() );
@ -588,7 +585,7 @@ void backward_convolution_device(Kernel_cnn* ker, float*** input, float*** input
funcPtr d_function = get_activation_function_cuda(activation); funcPtr d_function = get_activation_function_cuda(activation);
backward_convolution_propagate_kernel<<<gridSize3, blockSize3>>>(ker, input, input_z, output, input_depth, input_width, output_depth, k_size, d_function); backward_convolution_propagate_kernel<<<gridSize3, blockSize3>>>(kernel, input, input_z, output, input_depth, input_width, output_depth, kernel_size, d_function);
gpuErrchk( cudaPeekAtLastError() ); gpuErrchk( cudaPeekAtLastError() );
gpuErrchk( cudaDeviceSynchronize() ); gpuErrchk( cudaDeviceSynchronize() );
@ -597,9 +594,10 @@ void backward_convolution_device(Kernel_cnn* ker, float*** input, float*** input
#endif #endif
void backward_convolution_cpu(Kernel_cnn* ker, float*** input, float*** input_z, float*** output, int input_depth, int input_width, int output_depth, int output_width, int activation, int is_first) { void backward_convolution_cpu(Kernel_cnn* ker, float*** input, float*** input_z, float*** output, int input_depth, int input_width, int output_depth, int output_width, int activation, int is_first, int kernel_size, int padding, int stride) {
funcPtr d_function = get_activation_function(activation); funcPtr d_function = get_activation_function(activation);
int max_move = kernel_size - padding;
// Bias // Bias
for (int i=0; i < output_depth; i++) { for (int i=0; i < output_depth; i++) {
@ -611,16 +609,16 @@ void backward_convolution_cpu(Kernel_cnn* ker, float*** input, float*** input_z,
} }
// Weights // Weights
int k_size = input_width - output_width +1;
for (int h=0; h < input_depth; h++) { for (int h=0; h < input_depth; h++) {
for (int i=0; i < output_depth; i++) { for (int i=0; i < output_depth; i++) {
for (int j=0; j < k_size; j++) { for (int j=-padding; j < max_move; j++) {
for (int k=0; k < k_size; k++) { for (int k=-padding; k < max_move; k++) {
float tmp = 0; float tmp = 0;
for (int l=0; l < output_width; l++) { for (int l=0; l < output_width; l++) {
for (int m=0; m < output_width; m++) { for (int m=0; m < output_width; m++) {
tmp += input[h][l+j][m+k]*output[i][l][m]; if (not_outside(l*stride+j, m*stride+k, 0, input_width)) {
tmp += input[h][l*stride+j][m*stride+k]*output[i][l][m];
}
} }
} }
ker->d_weights[h][i][j][k] += tmp; ker->d_weights[h][i][j][k] += tmp;
@ -629,26 +627,35 @@ void backward_convolution_cpu(Kernel_cnn* ker, float*** input, float*** input_z,
} }
} }
// Input // Input TODO
if (is_first==1) // Pas besoin de backpropager dans l'input if (is_first==1) // Pas besoin de backpropager dans l'input
return; return;
int min_m, max_m, min_n, max_n;
for (int i=0; i < input_depth; i++) { for (int i=0; i < input_depth; i++) {
for (int j=0; j < input_width; j++) { for (int j=0; j < input_width; j++) {
for (int k=0; k < input_width; k++) { for (int k=0; k < input_width; k++) {
float tmp = 0; input[i][j][k] = 0;
for (int l=0; l < output_depth; l++) {
min_m = max(0, k_size-1-j);
max_m = min(k_size, input_width - j);
min_n = max(0, k_size-1-k);
max_n = min(k_size, input_width-k);
for (int m=min_m; m < max_m; m++) {
for (int n=min_n; n < max_n; n++) {
tmp += output[l][j-k_size+m+1][k-k_size+n+1]*ker->weights[i][l][m][n];
} }
} }
} }
input[i][j][k] = tmp*d_function(input_z[i][j][k]); for (int h=0; h < input_depth; h++) {
for (int i=0; i < output_depth; i++) {
for (int j=-padding; j < max_move; j++) {
for (int k=-padding; k < max_move; k++) {
for (int l=0; l < output_width; l++) {
for (int m=0; m < output_width; m++) {
if (not_outside(l*stride+j, m*stride+k, 0, input_width)) {
input[h][l*stride+j][m*stride+k] += output[i][l][m]*ker->weights[h][i][j+padding][k+padding];
}
}
}
}
}
}
}
for (int i=0; i < input_depth; i++) {
for (int j=0; j < input_width; j++) {
for (int k=0; k < input_width; k++) {
input[i][j][k] = input[i][j][k]*d_function(input_z[i][j][k]);
} }
} }
} }
@ -657,10 +664,10 @@ void backward_convolution_cpu(Kernel_cnn* ker, float*** input, float*** input_z,
#ifdef __CUDACC__ #ifdef __CUDACC__
extern "C" extern "C"
#endif #endif
void backward_convolution(Kernel_cnn* ker, float*** input, float*** input_z, float*** output, int input_depth, int input_width, int output_depth, int output_width, int activation, int is_first) { void backward_convolution(Kernel_cnn* ker, float*** input, float*** input_z, float*** output, int input_depth, int input_width, int output_depth, int output_width, int activation, int is_first, int kernel_size, int padding, int stride) {
#ifndef __CUDACC__ #ifndef __CUDACC__
backward_convolution_cpu(ker, input, input_z, output, input_depth, input_width, output_depth, output_width, activation, is_first); backward_convolution_cpu(ker, input, input_z, output, input_depth, input_width, output_depth, output_width, activation, is_first, kernel_size, padding, stride);
#else #else
backward_convolution_device(ker, input, input_z, output, input_depth, input_width, output_depth, output_width, activation, is_first); backward_convolution_device(ker, input, input_z, output, input_depth, input_width, output_depth, output_width, activation, is_first, kernel_size, padding, stride);
#endif #endif
} }

View File

@ -514,15 +514,15 @@ __global__ void backward_convolution_dbias_kernel(Kernel_cnn* ker, float*** outp
ker->d_bias[idx][idy][idz] += output[idx][idy][idz]; ker->d_bias[idx][idy][idz] += output[idx][idy][idz];
} }
__global__ void backward_convolution_dweight_kernel(Kernel_cnn* ker, float*** input, float*** output, int input_depth, int output_depth, int output_width, int k_size) { __global__ void backward_convolution_dweight_kernel(Kernel_cnn* ker, float*** input, float*** output, int input_depth, int output_depth, int output_width, int kernel_size) {
int idx = threadIdx.x + blockDim.x*blockIdx.x; int idx = threadIdx.x + blockDim.x*blockIdx.x;
int idy = threadIdx.y + blockDim.y*blockIdx.y; int idy = threadIdx.y + blockDim.y*blockIdx.y;
int idz = threadIdx.z + blockDim.z*blockIdx.z; int idz = threadIdx.z + blockDim.z*blockIdx.z;
int idz1 = idz / k_size; int idz1 = idz / kernel_size;
int idz2 = idz % k_size; int idz2 = idz % kernel_size;
if (idx >= input_depth || idy >= output_depth || idz1 >= k_size || idz2 >= k_size) { if (idx >= input_depth || idy >= output_depth || idz1 >= kernel_size || idz2 >= kernel_size) {
return; return;
} }
@ -560,23 +560,20 @@ __global__ void backward_convolution_propagate_kernel(Kernel_cnn* ker, float***
input[idx][idy][idz] = tmp*( (*d_f)(input_z[idx][idy][idz]) ); input[idx][idy][idz] = tmp*( (*d_f)(input_z[idx][idy][idz]) );
} }
void backward_convolution_device(Kernel_cnn* ker, float*** input, float*** input_z, float*** output, int input_depth, int input_width, int output_depth, int output_width, int activation, int is_first) { void backward_convolution_device(Kernel_cnn* kernel, float*** input, float*** input_z, float*** output, int input_depth, int input_width, int output_depth, int output_width, int activation, int is_first, int kernel_size, int padding, int stride) {
// Bias Kernel // Bias Kernel
dim3 gridSize1(i_div_up(output_depth, BLOCKSIZE_x), i_div_up(output_width, BLOCKSIZE_y), i_div_up(output_width, BLOCKSIZE_y)); dim3 gridSize1(i_div_up(output_depth, BLOCKSIZE_x), i_div_up(output_width, BLOCKSIZE_y), i_div_up(output_width, BLOCKSIZE_y));
dim3 blockSize1(BLOCKSIZE_x, BLOCKSIZE_y, BLOCKSIZE_z); dim3 blockSize1(BLOCKSIZE_x, BLOCKSIZE_y, BLOCKSIZE_z);
backward_convolution_dbias_kernel<<<gridSize1, blockSize1>>>(ker, output, output_depth, output_width); backward_convolution_dbias_kernel<<<gridSize1, blockSize1>>>(kernel, output, output_depth, output_width);
gpuErrchk( cudaPeekAtLastError() ); gpuErrchk( cudaPeekAtLastError() );
gpuErrchk( cudaDeviceSynchronize() ); gpuErrchk( cudaDeviceSynchronize() );
// Weights Kernel dim3 gridSize2(i_div_up(input_depth, BLOCKSIZE_x), i_div_up(output_depth, BLOCKSIZE_y), i_div_up(kernel_size*kernel_size, BLOCKSIZE_y));
int k_size = input_width - output_width +1;
dim3 gridSize2(i_div_up(input_depth, BLOCKSIZE_x), i_div_up(output_depth, BLOCKSIZE_y), i_div_up(k_size*k_size, BLOCKSIZE_y));
dim3 blockSize2(BLOCKSIZE_x, BLOCKSIZE_y, BLOCKSIZE_z); dim3 blockSize2(BLOCKSIZE_x, BLOCKSIZE_y, BLOCKSIZE_z);
backward_convolution_dweight_kernel<<<gridSize2, blockSize2>>>(ker, input, output, input_depth, output_depth, output_width, k_size); backward_convolution_dweight_kernel<<<gridSize2, blockSize2>>>(kernel, input, output, input_depth, output_depth, output_width, kernel_size);
gpuErrchk( cudaPeekAtLastError() ); gpuErrchk( cudaPeekAtLastError() );
gpuErrchk( cudaDeviceSynchronize() ); gpuErrchk( cudaDeviceSynchronize() );
@ -588,7 +585,7 @@ void backward_convolution_device(Kernel_cnn* ker, float*** input, float*** input
funcPtr d_function = get_activation_function_cuda(activation); funcPtr d_function = get_activation_function_cuda(activation);
backward_convolution_propagate_kernel<<<gridSize3, blockSize3>>>(ker, input, input_z, output, input_depth, input_width, output_depth, k_size, d_function); backward_convolution_propagate_kernel<<<gridSize3, blockSize3>>>(kernel, input, input_z, output, input_depth, input_width, output_depth, kernel_size, d_function);
gpuErrchk( cudaPeekAtLastError() ); gpuErrchk( cudaPeekAtLastError() );
gpuErrchk( cudaDeviceSynchronize() ); gpuErrchk( cudaDeviceSynchronize() );
@ -597,9 +594,10 @@ void backward_convolution_device(Kernel_cnn* ker, float*** input, float*** input
#endif #endif
void backward_convolution_cpu(Kernel_cnn* ker, float*** input, float*** input_z, float*** output, int input_depth, int input_width, int output_depth, int output_width, int activation, int is_first) { void backward_convolution_cpu(Kernel_cnn* ker, float*** input, float*** input_z, float*** output, int input_depth, int input_width, int output_depth, int output_width, int activation, int is_first, int kernel_size, int padding, int stride) {
funcPtr d_function = get_activation_function(activation); funcPtr d_function = get_activation_function(activation);
int max_move = kernel_size - padding;
// Bias // Bias
for (int i=0; i < output_depth; i++) { for (int i=0; i < output_depth; i++) {
@ -611,16 +609,16 @@ void backward_convolution_cpu(Kernel_cnn* ker, float*** input, float*** input_z,
} }
// Weights // Weights
int k_size = input_width - output_width +1;
for (int h=0; h < input_depth; h++) { for (int h=0; h < input_depth; h++) {
for (int i=0; i < output_depth; i++) { for (int i=0; i < output_depth; i++) {
for (int j=0; j < k_size; j++) { for (int j=-padding; j < max_move; j++) {
for (int k=0; k < k_size; k++) { for (int k=-padding; k < max_move; k++) {
float tmp = 0; float tmp = 0;
for (int l=0; l < output_width; l++) { for (int l=0; l < output_width; l++) {
for (int m=0; m < output_width; m++) { for (int m=0; m < output_width; m++) {
tmp += input[h][l+j][m+k]*output[i][l][m]; if (not_outside(l*stride+j, m*stride+k, 0, input_width)) {
tmp += input[h][l*stride+j][m*stride+k]*output[i][l][m];
}
} }
} }
ker->d_weights[h][i][j][k] += tmp; ker->d_weights[h][i][j][k] += tmp;
@ -629,26 +627,35 @@ void backward_convolution_cpu(Kernel_cnn* ker, float*** input, float*** input_z,
} }
} }
// Input // Input TODO
if (is_first==1) // Pas besoin de backpropager dans l'input if (is_first==1) // Pas besoin de backpropager dans l'input
return; return;
int min_m, max_m, min_n, max_n;
for (int i=0; i < input_depth; i++) { for (int i=0; i < input_depth; i++) {
for (int j=0; j < input_width; j++) { for (int j=0; j < input_width; j++) {
for (int k=0; k < input_width; k++) { for (int k=0; k < input_width; k++) {
float tmp = 0; input[i][j][k] = 0;
for (int l=0; l < output_depth; l++) {
min_m = max(0, k_size-1-j);
max_m = min(k_size, input_width - j);
min_n = max(0, k_size-1-k);
max_n = min(k_size, input_width-k);
for (int m=min_m; m < max_m; m++) {
for (int n=min_n; n < max_n; n++) {
tmp += output[l][j-k_size+m+1][k-k_size+n+1]*ker->weights[i][l][m][n];
} }
} }
} }
input[i][j][k] = tmp*d_function(input_z[i][j][k]); for (int h=0; h < input_depth; h++) {
for (int i=0; i < output_depth; i++) {
for (int j=-padding; j < max_move; j++) {
for (int k=-padding; k < max_move; k++) {
for (int l=0; l < output_width; l++) {
for (int m=0; m < output_width; m++) {
if (not_outside(l*stride+j, m*stride+k, 0, input_width)) {
input[h][l*stride+j][m*stride+k] += output[i][l][m]*ker->weights[h][i][j+padding][k+padding];
}
}
}
}
}
}
}
for (int i=0; i < input_depth; i++) {
for (int j=0; j < input_width; j++) {
for (int k=0; k < input_width; k++) {
input[i][j][k] = input[i][j][k]*d_function(input_z[i][j][k]);
} }
} }
} }
@ -657,10 +664,10 @@ void backward_convolution_cpu(Kernel_cnn* ker, float*** input, float*** input_z,
#ifdef __CUDACC__ #ifdef __CUDACC__
extern "C" extern "C"
#endif #endif
void backward_convolution(Kernel_cnn* ker, float*** input, float*** input_z, float*** output, int input_depth, int input_width, int output_depth, int output_width, int activation, int is_first) { void backward_convolution(Kernel_cnn* ker, float*** input, float*** input_z, float*** output, int input_depth, int input_width, int output_depth, int output_width, int activation, int is_first, int kernel_size, int padding, int stride) {
#ifndef __CUDACC__ #ifndef __CUDACC__
backward_convolution_cpu(ker, input, input_z, output, input_depth, input_width, output_depth, output_width, activation, is_first); backward_convolution_cpu(ker, input, input_z, output, input_depth, input_width, output_depth, output_width, activation, is_first, kernel_size, padding, stride);
#else #else
backward_convolution_device(ker, input, input_z, output, input_depth, input_width, output_depth, output_width, activation, is_first); backward_convolution_device(ker, input, input_z, output, input_depth, input_width, output_depth, output_width, activation, is_first, kernel_size, padding, stride);
#endif #endif
} }

View File

@ -263,7 +263,8 @@ void backward_propagation(Network* network, int wanted_number) {
if (k_i->cnn) { // Convolution if (k_i->cnn) { // Convolution
backward_convolution(k_i->cnn, input, input_z, output, input_depth, input_width, output_depth, output_width, -activation, is_last_layer); int kernel_size = k_i->cnn->k_size;
backward_convolution(k_i->cnn, input, input_z, output, input_depth, input_width, output_depth, output_width, -activation, is_last_layer, kernel_size, padding, stride);
} else if (k_i->nn) { // Full connection } else if (k_i->nn) { // Full connection
if (k_i->linearisation == DOESNT_LINEARISE) { // Vecteur -> Vecteur if (k_i->linearisation == DOESNT_LINEARISE) { // Vecteur -> Vecteur
backward_dense(k_i->nn, input[0][0], input_z[0][0], output[0][0], input_width, output_width, -activation, is_last_layer); backward_dense(k_i->nn, input[0][0], input_z[0][0], output[0][0], input_width, output_width, -activation, is_last_layer);

View File

@ -68,6 +68,6 @@ extern "C"
/* /*
* Transfert les informations d'erreur à travers un couche de convolution * Transfert les informations d'erreur à travers un couche de convolution
*/ */
void backward_convolution(Kernel_cnn* ker, float*** input, float*** input_z, float*** output, int input_depth, int input_width, int output_depth, int output_width, int activation, int is_first); void backward_convolution(Kernel_cnn* ker, float*** input, float*** input_z, float*** output, int input_depth, int input_width, int output_depth, int output_width, int activation, int is_first, int kernel_size, int padding, int stride);
#endif #endif