mirror of
https://github.com/augustin64/projet-tipe
synced 2025-01-23 15:16:26 +01:00
Add stride, padding to the backprop of convolution
This commit is contained in:
parent
e186839ec6
commit
46333299bd
@ -514,15 +514,15 @@ __global__ void backward_convolution_dbias_kernel(Kernel_cnn* ker, float*** outp
|
||||
ker->d_bias[idx][idy][idz] += output[idx][idy][idz];
|
||||
}
|
||||
|
||||
__global__ void backward_convolution_dweight_kernel(Kernel_cnn* ker, float*** input, float*** output, int input_depth, int output_depth, int output_width, int k_size) {
|
||||
__global__ void backward_convolution_dweight_kernel(Kernel_cnn* ker, float*** input, float*** output, int input_depth, int output_depth, int output_width, int kernel_size) {
|
||||
int idx = threadIdx.x + blockDim.x*blockIdx.x;
|
||||
int idy = threadIdx.y + blockDim.y*blockIdx.y;
|
||||
int idz = threadIdx.z + blockDim.z*blockIdx.z;
|
||||
|
||||
int idz1 = idz / k_size;
|
||||
int idz2 = idz % k_size;
|
||||
int idz1 = idz / kernel_size;
|
||||
int idz2 = idz % kernel_size;
|
||||
|
||||
if (idx >= input_depth || idy >= output_depth || idz1 >= k_size || idz2 >= k_size) {
|
||||
if (idx >= input_depth || idy >= output_depth || idz1 >= kernel_size || idz2 >= kernel_size) {
|
||||
return;
|
||||
}
|
||||
|
||||
@ -560,23 +560,20 @@ __global__ void backward_convolution_propagate_kernel(Kernel_cnn* ker, float***
|
||||
input[idx][idy][idz] = tmp*( (*d_f)(input_z[idx][idy][idz]) );
|
||||
}
|
||||
|
||||
void backward_convolution_device(Kernel_cnn* ker, float*** input, float*** input_z, float*** output, int input_depth, int input_width, int output_depth, int output_width, int activation, int is_first) {
|
||||
void backward_convolution_device(Kernel_cnn* kernel, float*** input, float*** input_z, float*** output, int input_depth, int input_width, int output_depth, int output_width, int activation, int is_first, int kernel_size, int padding, int stride) {
|
||||
// Bias Kernel
|
||||
dim3 gridSize1(i_div_up(output_depth, BLOCKSIZE_x), i_div_up(output_width, BLOCKSIZE_y), i_div_up(output_width, BLOCKSIZE_y));
|
||||
dim3 blockSize1(BLOCKSIZE_x, BLOCKSIZE_y, BLOCKSIZE_z);
|
||||
|
||||
backward_convolution_dbias_kernel<<<gridSize1, blockSize1>>>(ker, output, output_depth, output_width);
|
||||
backward_convolution_dbias_kernel<<<gridSize1, blockSize1>>>(kernel, output, output_depth, output_width);
|
||||
|
||||
gpuErrchk( cudaPeekAtLastError() );
|
||||
gpuErrchk( cudaDeviceSynchronize() );
|
||||
|
||||
// Weights Kernel
|
||||
int k_size = input_width - output_width +1;
|
||||
|
||||
dim3 gridSize2(i_div_up(input_depth, BLOCKSIZE_x), i_div_up(output_depth, BLOCKSIZE_y), i_div_up(k_size*k_size, BLOCKSIZE_y));
|
||||
dim3 gridSize2(i_div_up(input_depth, BLOCKSIZE_x), i_div_up(output_depth, BLOCKSIZE_y), i_div_up(kernel_size*kernel_size, BLOCKSIZE_y));
|
||||
dim3 blockSize2(BLOCKSIZE_x, BLOCKSIZE_y, BLOCKSIZE_z);
|
||||
|
||||
backward_convolution_dweight_kernel<<<gridSize2, blockSize2>>>(ker, input, output, input_depth, output_depth, output_width, k_size);
|
||||
backward_convolution_dweight_kernel<<<gridSize2, blockSize2>>>(kernel, input, output, input_depth, output_depth, output_width, kernel_size);
|
||||
|
||||
gpuErrchk( cudaPeekAtLastError() );
|
||||
gpuErrchk( cudaDeviceSynchronize() );
|
||||
@ -588,7 +585,7 @@ void backward_convolution_device(Kernel_cnn* ker, float*** input, float*** input
|
||||
|
||||
funcPtr d_function = get_activation_function_cuda(activation);
|
||||
|
||||
backward_convolution_propagate_kernel<<<gridSize3, blockSize3>>>(ker, input, input_z, output, input_depth, input_width, output_depth, k_size, d_function);
|
||||
backward_convolution_propagate_kernel<<<gridSize3, blockSize3>>>(kernel, input, input_z, output, input_depth, input_width, output_depth, kernel_size, d_function);
|
||||
|
||||
gpuErrchk( cudaPeekAtLastError() );
|
||||
gpuErrchk( cudaDeviceSynchronize() );
|
||||
@ -597,9 +594,10 @@ void backward_convolution_device(Kernel_cnn* ker, float*** input, float*** input
|
||||
#endif
|
||||
|
||||
|
||||
void backward_convolution_cpu(Kernel_cnn* ker, float*** input, float*** input_z, float*** output, int input_depth, int input_width, int output_depth, int output_width, int activation, int is_first) {
|
||||
void backward_convolution_cpu(Kernel_cnn* ker, float*** input, float*** input_z, float*** output, int input_depth, int input_width, int output_depth, int output_width, int activation, int is_first, int kernel_size, int padding, int stride) {
|
||||
|
||||
funcPtr d_function = get_activation_function(activation);
|
||||
int max_move = kernel_size - padding;
|
||||
|
||||
// Bias
|
||||
for (int i=0; i < output_depth; i++) {
|
||||
@ -611,16 +609,16 @@ void backward_convolution_cpu(Kernel_cnn* ker, float*** input, float*** input_z,
|
||||
}
|
||||
|
||||
// Weights
|
||||
int k_size = input_width - output_width +1;
|
||||
|
||||
for (int h=0; h < input_depth; h++) {
|
||||
for (int i=0; i < output_depth; i++) {
|
||||
for (int j=0; j < k_size; j++) {
|
||||
for (int k=0; k < k_size; k++) {
|
||||
for (int j=-padding; j < max_move; j++) {
|
||||
for (int k=-padding; k < max_move; k++) {
|
||||
float tmp = 0;
|
||||
for (int l=0; l < output_width; l++) {
|
||||
for (int m=0; m < output_width; m++) {
|
||||
tmp += input[h][l+j][m+k]*output[i][l][m];
|
||||
if (not_outside(l*stride+j, m*stride+k, 0, input_width)) {
|
||||
tmp += input[h][l*stride+j][m*stride+k]*output[i][l][m];
|
||||
}
|
||||
}
|
||||
}
|
||||
ker->d_weights[h][i][j][k] += tmp;
|
||||
@ -629,26 +627,35 @@ void backward_convolution_cpu(Kernel_cnn* ker, float*** input, float*** input_z,
|
||||
}
|
||||
}
|
||||
|
||||
// Input
|
||||
// Input TODO
|
||||
if (is_first==1) // Pas besoin de backpropager dans l'input
|
||||
return;
|
||||
int min_m, max_m, min_n, max_n;
|
||||
for (int i=0; i < input_depth; i++) {
|
||||
for (int j=0; j < input_width; j++) {
|
||||
for (int k=0; k < input_width; k++) {
|
||||
float tmp = 0;
|
||||
for (int l=0; l < output_depth; l++) {
|
||||
min_m = max(0, k_size-1-j);
|
||||
max_m = min(k_size, input_width - j);
|
||||
min_n = max(0, k_size-1-k);
|
||||
max_n = min(k_size, input_width-k);
|
||||
for (int m=min_m; m < max_m; m++) {
|
||||
for (int n=min_n; n < max_n; n++) {
|
||||
tmp += output[l][j-k_size+m+1][k-k_size+n+1]*ker->weights[i][l][m][n];
|
||||
input[i][j][k] = 0;
|
||||
}
|
||||
}
|
||||
}
|
||||
input[i][j][k] = tmp*d_function(input_z[i][j][k]);
|
||||
for (int h=0; h < input_depth; h++) {
|
||||
for (int i=0; i < output_depth; i++) {
|
||||
for (int j=-padding; j < max_move; j++) {
|
||||
for (int k=-padding; k < max_move; k++) {
|
||||
for (int l=0; l < output_width; l++) {
|
||||
for (int m=0; m < output_width; m++) {
|
||||
if (not_outside(l*stride+j, m*stride+k, 0, input_width)) {
|
||||
input[h][l*stride+j][m*stride+k] += output[i][l][m]*ker->weights[h][i][j+padding][k+padding];
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
for (int i=0; i < input_depth; i++) {
|
||||
for (int j=0; j < input_width; j++) {
|
||||
for (int k=0; k < input_width; k++) {
|
||||
input[i][j][k] = input[i][j][k]*d_function(input_z[i][j][k]);
|
||||
}
|
||||
}
|
||||
}
|
||||
@ -657,10 +664,10 @@ void backward_convolution_cpu(Kernel_cnn* ker, float*** input, float*** input_z,
|
||||
#ifdef __CUDACC__
|
||||
extern "C"
|
||||
#endif
|
||||
void backward_convolution(Kernel_cnn* ker, float*** input, float*** input_z, float*** output, int input_depth, int input_width, int output_depth, int output_width, int activation, int is_first) {
|
||||
void backward_convolution(Kernel_cnn* ker, float*** input, float*** input_z, float*** output, int input_depth, int input_width, int output_depth, int output_width, int activation, int is_first, int kernel_size, int padding, int stride) {
|
||||
#ifndef __CUDACC__
|
||||
backward_convolution_cpu(ker, input, input_z, output, input_depth, input_width, output_depth, output_width, activation, is_first);
|
||||
backward_convolution_cpu(ker, input, input_z, output, input_depth, input_width, output_depth, output_width, activation, is_first, kernel_size, padding, stride);
|
||||
#else
|
||||
backward_convolution_device(ker, input, input_z, output, input_depth, input_width, output_depth, output_width, activation, is_first);
|
||||
backward_convolution_device(ker, input, input_z, output, input_depth, input_width, output_depth, output_width, activation, is_first, kernel_size, padding, stride);
|
||||
#endif
|
||||
}
|
@ -514,15 +514,15 @@ __global__ void backward_convolution_dbias_kernel(Kernel_cnn* ker, float*** outp
|
||||
ker->d_bias[idx][idy][idz] += output[idx][idy][idz];
|
||||
}
|
||||
|
||||
__global__ void backward_convolution_dweight_kernel(Kernel_cnn* ker, float*** input, float*** output, int input_depth, int output_depth, int output_width, int k_size) {
|
||||
__global__ void backward_convolution_dweight_kernel(Kernel_cnn* ker, float*** input, float*** output, int input_depth, int output_depth, int output_width, int kernel_size) {
|
||||
int idx = threadIdx.x + blockDim.x*blockIdx.x;
|
||||
int idy = threadIdx.y + blockDim.y*blockIdx.y;
|
||||
int idz = threadIdx.z + blockDim.z*blockIdx.z;
|
||||
|
||||
int idz1 = idz / k_size;
|
||||
int idz2 = idz % k_size;
|
||||
int idz1 = idz / kernel_size;
|
||||
int idz2 = idz % kernel_size;
|
||||
|
||||
if (idx >= input_depth || idy >= output_depth || idz1 >= k_size || idz2 >= k_size) {
|
||||
if (idx >= input_depth || idy >= output_depth || idz1 >= kernel_size || idz2 >= kernel_size) {
|
||||
return;
|
||||
}
|
||||
|
||||
@ -560,23 +560,20 @@ __global__ void backward_convolution_propagate_kernel(Kernel_cnn* ker, float***
|
||||
input[idx][idy][idz] = tmp*( (*d_f)(input_z[idx][idy][idz]) );
|
||||
}
|
||||
|
||||
void backward_convolution_device(Kernel_cnn* ker, float*** input, float*** input_z, float*** output, int input_depth, int input_width, int output_depth, int output_width, int activation, int is_first) {
|
||||
void backward_convolution_device(Kernel_cnn* kernel, float*** input, float*** input_z, float*** output, int input_depth, int input_width, int output_depth, int output_width, int activation, int is_first, int kernel_size, int padding, int stride) {
|
||||
// Bias Kernel
|
||||
dim3 gridSize1(i_div_up(output_depth, BLOCKSIZE_x), i_div_up(output_width, BLOCKSIZE_y), i_div_up(output_width, BLOCKSIZE_y));
|
||||
dim3 blockSize1(BLOCKSIZE_x, BLOCKSIZE_y, BLOCKSIZE_z);
|
||||
|
||||
backward_convolution_dbias_kernel<<<gridSize1, blockSize1>>>(ker, output, output_depth, output_width);
|
||||
backward_convolution_dbias_kernel<<<gridSize1, blockSize1>>>(kernel, output, output_depth, output_width);
|
||||
|
||||
gpuErrchk( cudaPeekAtLastError() );
|
||||
gpuErrchk( cudaDeviceSynchronize() );
|
||||
|
||||
// Weights Kernel
|
||||
int k_size = input_width - output_width +1;
|
||||
|
||||
dim3 gridSize2(i_div_up(input_depth, BLOCKSIZE_x), i_div_up(output_depth, BLOCKSIZE_y), i_div_up(k_size*k_size, BLOCKSIZE_y));
|
||||
dim3 gridSize2(i_div_up(input_depth, BLOCKSIZE_x), i_div_up(output_depth, BLOCKSIZE_y), i_div_up(kernel_size*kernel_size, BLOCKSIZE_y));
|
||||
dim3 blockSize2(BLOCKSIZE_x, BLOCKSIZE_y, BLOCKSIZE_z);
|
||||
|
||||
backward_convolution_dweight_kernel<<<gridSize2, blockSize2>>>(ker, input, output, input_depth, output_depth, output_width, k_size);
|
||||
backward_convolution_dweight_kernel<<<gridSize2, blockSize2>>>(kernel, input, output, input_depth, output_depth, output_width, kernel_size);
|
||||
|
||||
gpuErrchk( cudaPeekAtLastError() );
|
||||
gpuErrchk( cudaDeviceSynchronize() );
|
||||
@ -588,7 +585,7 @@ void backward_convolution_device(Kernel_cnn* ker, float*** input, float*** input
|
||||
|
||||
funcPtr d_function = get_activation_function_cuda(activation);
|
||||
|
||||
backward_convolution_propagate_kernel<<<gridSize3, blockSize3>>>(ker, input, input_z, output, input_depth, input_width, output_depth, k_size, d_function);
|
||||
backward_convolution_propagate_kernel<<<gridSize3, blockSize3>>>(kernel, input, input_z, output, input_depth, input_width, output_depth, kernel_size, d_function);
|
||||
|
||||
gpuErrchk( cudaPeekAtLastError() );
|
||||
gpuErrchk( cudaDeviceSynchronize() );
|
||||
@ -597,9 +594,10 @@ void backward_convolution_device(Kernel_cnn* ker, float*** input, float*** input
|
||||
#endif
|
||||
|
||||
|
||||
void backward_convolution_cpu(Kernel_cnn* ker, float*** input, float*** input_z, float*** output, int input_depth, int input_width, int output_depth, int output_width, int activation, int is_first) {
|
||||
void backward_convolution_cpu(Kernel_cnn* ker, float*** input, float*** input_z, float*** output, int input_depth, int input_width, int output_depth, int output_width, int activation, int is_first, int kernel_size, int padding, int stride) {
|
||||
|
||||
funcPtr d_function = get_activation_function(activation);
|
||||
int max_move = kernel_size - padding;
|
||||
|
||||
// Bias
|
||||
for (int i=0; i < output_depth; i++) {
|
||||
@ -611,16 +609,16 @@ void backward_convolution_cpu(Kernel_cnn* ker, float*** input, float*** input_z,
|
||||
}
|
||||
|
||||
// Weights
|
||||
int k_size = input_width - output_width +1;
|
||||
|
||||
for (int h=0; h < input_depth; h++) {
|
||||
for (int i=0; i < output_depth; i++) {
|
||||
for (int j=0; j < k_size; j++) {
|
||||
for (int k=0; k < k_size; k++) {
|
||||
for (int j=-padding; j < max_move; j++) {
|
||||
for (int k=-padding; k < max_move; k++) {
|
||||
float tmp = 0;
|
||||
for (int l=0; l < output_width; l++) {
|
||||
for (int m=0; m < output_width; m++) {
|
||||
tmp += input[h][l+j][m+k]*output[i][l][m];
|
||||
if (not_outside(l*stride+j, m*stride+k, 0, input_width)) {
|
||||
tmp += input[h][l*stride+j][m*stride+k]*output[i][l][m];
|
||||
}
|
||||
}
|
||||
}
|
||||
ker->d_weights[h][i][j][k] += tmp;
|
||||
@ -629,26 +627,35 @@ void backward_convolution_cpu(Kernel_cnn* ker, float*** input, float*** input_z,
|
||||
}
|
||||
}
|
||||
|
||||
// Input
|
||||
// Input TODO
|
||||
if (is_first==1) // Pas besoin de backpropager dans l'input
|
||||
return;
|
||||
int min_m, max_m, min_n, max_n;
|
||||
for (int i=0; i < input_depth; i++) {
|
||||
for (int j=0; j < input_width; j++) {
|
||||
for (int k=0; k < input_width; k++) {
|
||||
float tmp = 0;
|
||||
for (int l=0; l < output_depth; l++) {
|
||||
min_m = max(0, k_size-1-j);
|
||||
max_m = min(k_size, input_width - j);
|
||||
min_n = max(0, k_size-1-k);
|
||||
max_n = min(k_size, input_width-k);
|
||||
for (int m=min_m; m < max_m; m++) {
|
||||
for (int n=min_n; n < max_n; n++) {
|
||||
tmp += output[l][j-k_size+m+1][k-k_size+n+1]*ker->weights[i][l][m][n];
|
||||
input[i][j][k] = 0;
|
||||
}
|
||||
}
|
||||
}
|
||||
input[i][j][k] = tmp*d_function(input_z[i][j][k]);
|
||||
for (int h=0; h < input_depth; h++) {
|
||||
for (int i=0; i < output_depth; i++) {
|
||||
for (int j=-padding; j < max_move; j++) {
|
||||
for (int k=-padding; k < max_move; k++) {
|
||||
for (int l=0; l < output_width; l++) {
|
||||
for (int m=0; m < output_width; m++) {
|
||||
if (not_outside(l*stride+j, m*stride+k, 0, input_width)) {
|
||||
input[h][l*stride+j][m*stride+k] += output[i][l][m]*ker->weights[h][i][j+padding][k+padding];
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
for (int i=0; i < input_depth; i++) {
|
||||
for (int j=0; j < input_width; j++) {
|
||||
for (int k=0; k < input_width; k++) {
|
||||
input[i][j][k] = input[i][j][k]*d_function(input_z[i][j][k]);
|
||||
}
|
||||
}
|
||||
}
|
||||
@ -657,10 +664,10 @@ void backward_convolution_cpu(Kernel_cnn* ker, float*** input, float*** input_z,
|
||||
#ifdef __CUDACC__
|
||||
extern "C"
|
||||
#endif
|
||||
void backward_convolution(Kernel_cnn* ker, float*** input, float*** input_z, float*** output, int input_depth, int input_width, int output_depth, int output_width, int activation, int is_first) {
|
||||
void backward_convolution(Kernel_cnn* ker, float*** input, float*** input_z, float*** output, int input_depth, int input_width, int output_depth, int output_width, int activation, int is_first, int kernel_size, int padding, int stride) {
|
||||
#ifndef __CUDACC__
|
||||
backward_convolution_cpu(ker, input, input_z, output, input_depth, input_width, output_depth, output_width, activation, is_first);
|
||||
backward_convolution_cpu(ker, input, input_z, output, input_depth, input_width, output_depth, output_width, activation, is_first, kernel_size, padding, stride);
|
||||
#else
|
||||
backward_convolution_device(ker, input, input_z, output, input_depth, input_width, output_depth, output_width, activation, is_first);
|
||||
backward_convolution_device(ker, input, input_z, output, input_depth, input_width, output_depth, output_width, activation, is_first, kernel_size, padding, stride);
|
||||
#endif
|
||||
}
|
@ -263,7 +263,8 @@ void backward_propagation(Network* network, int wanted_number) {
|
||||
|
||||
|
||||
if (k_i->cnn) { // Convolution
|
||||
backward_convolution(k_i->cnn, input, input_z, output, input_depth, input_width, output_depth, output_width, -activation, is_last_layer);
|
||||
int kernel_size = k_i->cnn->k_size;
|
||||
backward_convolution(k_i->cnn, input, input_z, output, input_depth, input_width, output_depth, output_width, -activation, is_last_layer, kernel_size, padding, stride);
|
||||
} else if (k_i->nn) { // Full connection
|
||||
if (k_i->linearisation == DOESNT_LINEARISE) { // Vecteur -> Vecteur
|
||||
backward_dense(k_i->nn, input[0][0], input_z[0][0], output[0][0], input_width, output_width, -activation, is_last_layer);
|
||||
|
@ -68,6 +68,6 @@ extern "C"
|
||||
/*
|
||||
* Transfert les informations d'erreur à travers un couche de convolution
|
||||
*/
|
||||
void backward_convolution(Kernel_cnn* ker, float*** input, float*** input_z, float*** output, int input_depth, int input_width, int output_depth, int output_width, int activation, int is_first);
|
||||
void backward_convolution(Kernel_cnn* ker, float*** input, float*** input_z, float*** output, int input_depth, int input_width, int output_depth, int output_width, int activation, int is_first, int kernel_size, int padding, int stride);
|
||||
|
||||
#endif
|
||||
|
Loading…
Reference in New Issue
Block a user