From 13c76cb0026b794fbaa55a8467a76a1f1fcb5430 Mon Sep 17 00:00:00 2001 From: augustin64 Date: Sat, 3 Jun 2023 16:04:29 +0200 Subject: [PATCH] Redefine NOT_OUTSIDE with a macro --- src/cnn/backpropagation.c | 16 ++++++++-------- src/cnn/backpropagation.cu | 16 ++++++++-------- src/cnn/convolution.c | 4 ++-- src/cnn/convolution.cu | 4 ++-- src/cnn/make.c | 8 ++++---- src/cnn/make.cu | 8 ++++---- src/common/include/utils.h | 11 +---------- src/common/utils.c | 7 ------- src/common/utils.cu | 7 ------- 9 files changed, 29 insertions(+), 52 deletions(-) diff --git a/src/cnn/backpropagation.c b/src/cnn/backpropagation.c index 8b12822..576437e 100644 --- a/src/cnn/backpropagation.c +++ b/src/cnn/backpropagation.c @@ -118,7 +118,7 @@ __global__ void backward_average_pooling_kernel(float*** input, float*** output, for (int b=-padding; b < max_move; b++) { int idy_2 = stride*idy +a; int idz_2 = stride*idz +b; - if (not_outside(idy_2, idz_2, 0, input_width)) { + if (NOT_OUTSIDE(idy_2, idz_2, 0, input_width)) { int y = min(idy_2+1, min(kernel_size, input_width - idy_2)); int z = min(idz_2+1, min(kernel_size, input_width - idz_2)); input[idx][idy_2][idz_2] += output[idx][idy][idz]/(y*z); @@ -155,7 +155,7 @@ void backward_average_pooling_cpu(float*** input, float*** output, int input_wid for (int b=-padding; b < max_move; b++) { int j_2 = stride*j +a; int k_2 = stride*k + b; - if (not_outside(j_2, k_2, 0, input_width)){ + if (NOT_OUTSIDE(j_2, k_2, 0, input_width)){ int j_3 = min(j_2+1, min(kernel_size, input_width - j_2)); int k_3 = min(k_2+1, min(kernel_size, input_width - k_2)); input[i][j_2][k_2] += output[i][j][k]/(j_3*k_3); @@ -202,7 +202,7 @@ __global__ void backward_max_pooling_kernel(float*** input, float*** output, int for (int b=-padding; b < max_move; b++) { int idy_2 = stride*idy +a; int idz_2 = stride*idz +b; - if (not_outside(idy_2, idz_2, 0, input_width)) { + if (NOT_OUTSIDE(idy_2, idz_2, 0, input_width)) { if (input[idx][idy_2][idz_2] > m) { m = input[idx][idy_2][idz_2]; a_max = a; @@ -250,7 +250,7 @@ void backward_max_pooling_cpu(float*** input, float*** output, int input_width, for (int b=-padding; b < max_move; b++) { int j_2 = stride*j +a; int k_2 = stride*k +b; - if (not_outside(j_2, k_2, 0, input_width)) { + if (NOT_OUTSIDE(j_2, k_2, 0, input_width)) { if (input[i][j_2][k_2] > m) { m = input[i][j_2][k_2]; a_max = a; @@ -527,7 +527,7 @@ __global__ void backward_convolution_dweight_kernel(float**** d_weights, float** for (int h=0; h < input_depth; h++) { for (int j=-padding; j < max_move; j++) { for (int k=-padding; k < max_move; k++) { - if (not_outside(idx*stride+j, idy*stride+k, 0, input_width)) { + if (NOT_OUTSIDE(idx*stride+j, idy*stride+k, 0, input_width)) { atomicAdd(&d_weights[h][idz][j+padding][k+padding], input[h][idx*stride+j][idy*stride+k]*output[idz][idx][idy]); } } @@ -547,7 +547,7 @@ __global__ void backward_convolution_propagate_kernel(float**** weights, float** for (int k=-padding; k < max_move; k++) { for (int l=0; l < output_width; l++) { for (int m=0; m < output_width; m++) { - if (not_outside(l*stride+j, m*stride+k, 0, input_width)) { + if (NOT_OUTSIDE(l*stride+j, m*stride+k, 0, input_width)) { atomicAdd(&input[idx][l*stride+j][m*stride+k], output[idy][l][m]*weights[idx][idy][j+padding][k+padding]); } } @@ -632,7 +632,7 @@ void backward_convolution_cpu(Kernel_cnn* ker, float*** input, float*** input_z, float tmp = 0; for (int l=0; l < output_width; l++) { for (int m=0; m < output_width; m++) { - if (not_outside(l*stride+j, m*stride+k, 0, input_width)) { + if (NOT_OUTSIDE(l*stride+j, m*stride+k, 0, input_width)) { tmp += input[h][l*stride+j][m*stride+k]*output[i][l][m]; } } @@ -659,7 +659,7 @@ void backward_convolution_cpu(Kernel_cnn* ker, float*** input, float*** input_z, for (int k=-padding; k < max_move; k++) { for (int l=0; l < output_width; l++) { for (int m=0; m < output_width; m++) { - if (not_outside(l*stride+j, m*stride+k, 0, input_width)) { + if (NOT_OUTSIDE(l*stride+j, m*stride+k, 0, input_width)) { input[h][l*stride+j][m*stride+k] += output[i][l][m]*ker->weights[h][i][j+padding][k+padding]; } } diff --git a/src/cnn/backpropagation.cu b/src/cnn/backpropagation.cu index 1d23a2b..6c2f34f 100644 --- a/src/cnn/backpropagation.cu +++ b/src/cnn/backpropagation.cu @@ -118,7 +118,7 @@ __global__ void backward_average_pooling_kernel(float*** input, float*** output, for (int b=-padding; b < max_move; b++) { int idy_2 = stride*idy +a; int idz_2 = stride*idz +b; - if (not_outside(idy_2, idz_2, 0, input_width)) { + if (NOT_OUTSIDE(idy_2, idz_2, 0, input_width)) { int y = min(idy_2+1, min(kernel_size, input_width - idy_2)); int z = min(idz_2+1, min(kernel_size, input_width - idz_2)); input[idx][idy_2][idz_2] += output[idx][idy][idz]/(y*z); @@ -155,7 +155,7 @@ void backward_average_pooling_cpu(float*** input, float*** output, int input_wid for (int b=-padding; b < max_move; b++) { int j_2 = stride*j +a; int k_2 = stride*k + b; - if (not_outside(j_2, k_2, 0, input_width)){ + if (NOT_OUTSIDE(j_2, k_2, 0, input_width)){ int j_3 = min(j_2+1, min(kernel_size, input_width - j_2)); int k_3 = min(k_2+1, min(kernel_size, input_width - k_2)); input[i][j_2][k_2] += output[i][j][k]/(j_3*k_3); @@ -202,7 +202,7 @@ __global__ void backward_max_pooling_kernel(float*** input, float*** output, int for (int b=-padding; b < max_move; b++) { int idy_2 = stride*idy +a; int idz_2 = stride*idz +b; - if (not_outside(idy_2, idz_2, 0, input_width)) { + if (NOT_OUTSIDE(idy_2, idz_2, 0, input_width)) { if (input[idx][idy_2][idz_2] > m) { m = input[idx][idy_2][idz_2]; a_max = a; @@ -250,7 +250,7 @@ void backward_max_pooling_cpu(float*** input, float*** output, int input_width, for (int b=-padding; b < max_move; b++) { int j_2 = stride*j +a; int k_2 = stride*k +b; - if (not_outside(j_2, k_2, 0, input_width)) { + if (NOT_OUTSIDE(j_2, k_2, 0, input_width)) { if (input[i][j_2][k_2] > m) { m = input[i][j_2][k_2]; a_max = a; @@ -527,7 +527,7 @@ __global__ void backward_convolution_dweight_kernel(float**** d_weights, float** for (int h=0; h < input_depth; h++) { for (int j=-padding; j < max_move; j++) { for (int k=-padding; k < max_move; k++) { - if (not_outside(idx*stride+j, idy*stride+k, 0, input_width)) { + if (NOT_OUTSIDE(idx*stride+j, idy*stride+k, 0, input_width)) { atomicAdd(&d_weights[h][idz][j+padding][k+padding], input[h][idx*stride+j][idy*stride+k]*output[idz][idx][idy]); } } @@ -547,7 +547,7 @@ __global__ void backward_convolution_propagate_kernel(float**** weights, float** for (int k=-padding; k < max_move; k++) { for (int l=0; l < output_width; l++) { for (int m=0; m < output_width; m++) { - if (not_outside(l*stride+j, m*stride+k, 0, input_width)) { + if (NOT_OUTSIDE(l*stride+j, m*stride+k, 0, input_width)) { atomicAdd(&input[idx][l*stride+j][m*stride+k], output[idy][l][m]*weights[idx][idy][j+padding][k+padding]); } } @@ -632,7 +632,7 @@ void backward_convolution_cpu(Kernel_cnn* ker, float*** input, float*** input_z, float tmp = 0; for (int l=0; l < output_width; l++) { for (int m=0; m < output_width; m++) { - if (not_outside(l*stride+j, m*stride+k, 0, input_width)) { + if (NOT_OUTSIDE(l*stride+j, m*stride+k, 0, input_width)) { tmp += input[h][l*stride+j][m*stride+k]*output[i][l][m]; } } @@ -659,7 +659,7 @@ void backward_convolution_cpu(Kernel_cnn* ker, float*** input, float*** input_z, for (int k=-padding; k < max_move; k++) { for (int l=0; l < output_width; l++) { for (int m=0; m < output_width; m++) { - if (not_outside(l*stride+j, m*stride+k, 0, input_width)) { + if (NOT_OUTSIDE(l*stride+j, m*stride+k, 0, input_width)) { input[h][l*stride+j][m*stride+k] += output[i][l][m]*ker->weights[h][i][j+padding][k+padding]; } } diff --git a/src/cnn/convolution.c b/src/cnn/convolution.c index 715e099..b9eaede 100644 --- a/src/cnn/convolution.c +++ b/src/cnn/convolution.c @@ -28,7 +28,7 @@ void make_convolution_cpu(Kernel_cnn* kernel, float*** input, float*** output, i for (int c=-padding; c < max_move; c++) { // colonne du filtre int x = (stride*j+b); int y = (stride*k+c); - if (not_outside(x, y, 0, input_width)) { + if (NOT_OUTSIDE(x, y, 0, input_width)) { f += kernel->weights[a][i][b+padding][c+padding]*input[a][x][y]; } } @@ -61,7 +61,7 @@ __global__ void make_convolution_kernel(float**** weights, float*** bias, int k_ for (int c=-padding; c < max_move; c++) { int idy_2 = idy*stride+b; int idz_2 = idz*stride+c; - if (not_outside(idy_2, idz_2, 0, input_width)) { + if (NOT_OUTSIDE(idy_2, idz_2, 0, input_width)) { f += weights[a][idx][b+padding][c+padding]*input[a][idy_2][idz_2]; } } diff --git a/src/cnn/convolution.cu b/src/cnn/convolution.cu index 715e099..b9eaede 100644 --- a/src/cnn/convolution.cu +++ b/src/cnn/convolution.cu @@ -28,7 +28,7 @@ void make_convolution_cpu(Kernel_cnn* kernel, float*** input, float*** output, i for (int c=-padding; c < max_move; c++) { // colonne du filtre int x = (stride*j+b); int y = (stride*k+c); - if (not_outside(x, y, 0, input_width)) { + if (NOT_OUTSIDE(x, y, 0, input_width)) { f += kernel->weights[a][i][b+padding][c+padding]*input[a][x][y]; } } @@ -61,7 +61,7 @@ __global__ void make_convolution_kernel(float**** weights, float*** bias, int k_ for (int c=-padding; c < max_move; c++) { int idy_2 = idy*stride+b; int idz_2 = idz*stride+c; - if (not_outside(idy_2, idz_2, 0, input_width)) { + if (NOT_OUTSIDE(idy_2, idz_2, 0, input_width)) { f += weights[a][idx][b+padding][c+padding]*input[a][idy_2][idz_2]; } } diff --git a/src/cnn/make.c b/src/cnn/make.c index 96102bc..47614c2 100644 --- a/src/cnn/make.c +++ b/src/cnn/make.c @@ -34,7 +34,7 @@ __global__ void make_average_pooling_kernel(float*** input, float*** output, int for (int b=-padding; b < max_move; b++) { int idy_2 = stride*idy +a; int idz_2 = stride*idz +b; - if (not_outside(idy_2, idz_2, 0, input_width)) { + if (NOT_OUTSIDE(idy_2, idz_2, 0, input_width)) { sum += input[idx][idy_2][idz_2]; nb_elements++; } @@ -69,7 +69,7 @@ void make_average_pooling_cpu(float*** input, float*** output, int size, int out for (int b=-padding; b < max_move; b++) { int j_2 = stride*j +a; int k_2 = stride*k +b; - if (not_outside(j_2, k_2, 0, input_width)) { + if (NOT_OUTSIDE(j_2, k_2, 0, input_width)) { sum += input[i][j_2][k_2]; nb_elements++; } @@ -119,7 +119,7 @@ __global__ void make_max_pooling_kernel(float*** input, float*** output, int siz for (int b=-padding; b < max_move; b++) { int idy_2 = stride*idy +a; int idz_2 = stride*idz +b; - if (not_outside(idy_2, idz_2, 0, input_width)) { + if (NOT_OUTSIDE(idy_2, idz_2, 0, input_width)) { temp = input[idx][idy_2][idz_2]; m = m > temp ? m : temp; // max(m, temp) } @@ -153,7 +153,7 @@ void make_max_pooling_cpu(float*** input, float*** output, int size, int output_ for (int b=-padding; b < max_move; b++) { int j_2 = stride*j +a; int k_2 = stride*k +b; - if (not_outside(j_2, k_2, 0, input_width)) { + if (NOT_OUTSIDE(j_2, k_2, 0, input_width)) { m = fmaxf(m, input[i][j_2][k_2]); } } diff --git a/src/cnn/make.cu b/src/cnn/make.cu index 96102bc..47614c2 100644 --- a/src/cnn/make.cu +++ b/src/cnn/make.cu @@ -34,7 +34,7 @@ __global__ void make_average_pooling_kernel(float*** input, float*** output, int for (int b=-padding; b < max_move; b++) { int idy_2 = stride*idy +a; int idz_2 = stride*idz +b; - if (not_outside(idy_2, idz_2, 0, input_width)) { + if (NOT_OUTSIDE(idy_2, idz_2, 0, input_width)) { sum += input[idx][idy_2][idz_2]; nb_elements++; } @@ -69,7 +69,7 @@ void make_average_pooling_cpu(float*** input, float*** output, int size, int out for (int b=-padding; b < max_move; b++) { int j_2 = stride*j +a; int k_2 = stride*k +b; - if (not_outside(j_2, k_2, 0, input_width)) { + if (NOT_OUTSIDE(j_2, k_2, 0, input_width)) { sum += input[i][j_2][k_2]; nb_elements++; } @@ -119,7 +119,7 @@ __global__ void make_max_pooling_kernel(float*** input, float*** output, int siz for (int b=-padding; b < max_move; b++) { int idy_2 = stride*idy +a; int idz_2 = stride*idz +b; - if (not_outside(idy_2, idz_2, 0, input_width)) { + if (NOT_OUTSIDE(idy_2, idz_2, 0, input_width)) { temp = input[idx][idy_2][idz_2]; m = m > temp ? m : temp; // max(m, temp) } @@ -153,7 +153,7 @@ void make_max_pooling_cpu(float*** input, float*** output, int size, int output_ for (int b=-padding; b < max_move; b++) { int j_2 = stride*j +a; int k_2 = stride*k +b; - if (not_outside(j_2, k_2, 0, input_width)) { + if (NOT_OUTSIDE(j_2, k_2, 0, input_width)) { m = fmaxf(m, input[i][j_2][k_2]); } } diff --git a/src/common/include/utils.h b/src/common/include/utils.h index 8c66828..39a48e8 100644 --- a/src/common/include/utils.h +++ b/src/common/include/utils.h @@ -26,6 +26,7 @@ inline void gpuAssert(cudaError_t code, const char *file, int line, bool abort=t } #endif +#define NOT_OUTSIDE(x, y, lower_bound, upper_bound) !(x < lower_bound || y < lower_bound || x >= upper_bound || y>= upper_bound) #ifndef __CUDACC__ /* @@ -40,16 +41,6 @@ int max(int a, int b); #endif -#ifdef __CUDACC__ -__host__ __device__ -#endif -/* -* On renvoie true si et seulement si _ et _: -* lower_bound <= x < upper_bound -* lower_bound <= y < upper_bound -*/ -bool not_outside(int x, int y, int lower_bound, int upper_bound); - /* * Partie entière supérieure de a/b */ diff --git a/src/common/utils.c b/src/common/utils.c index 5292dc3..9765e19 100644 --- a/src/common/utils.c +++ b/src/common/utils.c @@ -29,13 +29,6 @@ int max(int a, int b) { #endif -#ifdef __CUDACC__ -__host__ __device__ -#endif -bool not_outside(int x, int y, int lower_bound, int upper_bound) { - return !(x < lower_bound || y < lower_bound || x >= upper_bound || y>= upper_bound); -} - int i_div_up(int a, int b) { // Partie entière supérieure de a/b return ((a % b) != 0) ? (a / b + 1) : (a / b); } diff --git a/src/common/utils.cu b/src/common/utils.cu index 5292dc3..9765e19 100644 --- a/src/common/utils.cu +++ b/src/common/utils.cu @@ -29,13 +29,6 @@ int max(int a, int b) { #endif -#ifdef __CUDACC__ -__host__ __device__ -#endif -bool not_outside(int x, int y, int lower_bound, int upper_bound) { - return !(x < lower_bound || y < lower_bound || x >= upper_bound || y>= upper_bound); -} - int i_div_up(int a, int b) { // Partie entière supérieure de a/b return ((a % b) != 0) ? (a / b + 1) : (a / b); }