Redefine NOT_OUTSIDE with a macro

This commit is contained in:
augustin64 2023-06-03 16:04:29 +02:00
parent 38bcfb700e
commit 13c76cb002
9 changed files with 29 additions and 52 deletions

View File

@ -118,7 +118,7 @@ __global__ void backward_average_pooling_kernel(float*** input, float*** output,
for (int b=-padding; b < max_move; b++) { for (int b=-padding; b < max_move; b++) {
int idy_2 = stride*idy +a; int idy_2 = stride*idy +a;
int idz_2 = stride*idz +b; int idz_2 = stride*idz +b;
if (not_outside(idy_2, idz_2, 0, input_width)) { if (NOT_OUTSIDE(idy_2, idz_2, 0, input_width)) {
int y = min(idy_2+1, min(kernel_size, input_width - idy_2)); int y = min(idy_2+1, min(kernel_size, input_width - idy_2));
int z = min(idz_2+1, min(kernel_size, input_width - idz_2)); int z = min(idz_2+1, min(kernel_size, input_width - idz_2));
input[idx][idy_2][idz_2] += output[idx][idy][idz]/(y*z); input[idx][idy_2][idz_2] += output[idx][idy][idz]/(y*z);
@ -155,7 +155,7 @@ void backward_average_pooling_cpu(float*** input, float*** output, int input_wid
for (int b=-padding; b < max_move; b++) { for (int b=-padding; b < max_move; b++) {
int j_2 = stride*j +a; int j_2 = stride*j +a;
int k_2 = stride*k + b; int k_2 = stride*k + b;
if (not_outside(j_2, k_2, 0, input_width)){ if (NOT_OUTSIDE(j_2, k_2, 0, input_width)){
int j_3 = min(j_2+1, min(kernel_size, input_width - j_2)); int j_3 = min(j_2+1, min(kernel_size, input_width - j_2));
int k_3 = min(k_2+1, min(kernel_size, input_width - k_2)); int k_3 = min(k_2+1, min(kernel_size, input_width - k_2));
input[i][j_2][k_2] += output[i][j][k]/(j_3*k_3); input[i][j_2][k_2] += output[i][j][k]/(j_3*k_3);
@ -202,7 +202,7 @@ __global__ void backward_max_pooling_kernel(float*** input, float*** output, int
for (int b=-padding; b < max_move; b++) { for (int b=-padding; b < max_move; b++) {
int idy_2 = stride*idy +a; int idy_2 = stride*idy +a;
int idz_2 = stride*idz +b; int idz_2 = stride*idz +b;
if (not_outside(idy_2, idz_2, 0, input_width)) { if (NOT_OUTSIDE(idy_2, idz_2, 0, input_width)) {
if (input[idx][idy_2][idz_2] > m) { if (input[idx][idy_2][idz_2] > m) {
m = input[idx][idy_2][idz_2]; m = input[idx][idy_2][idz_2];
a_max = a; a_max = a;
@ -250,7 +250,7 @@ void backward_max_pooling_cpu(float*** input, float*** output, int input_width,
for (int b=-padding; b < max_move; b++) { for (int b=-padding; b < max_move; b++) {
int j_2 = stride*j +a; int j_2 = stride*j +a;
int k_2 = stride*k +b; int k_2 = stride*k +b;
if (not_outside(j_2, k_2, 0, input_width)) { if (NOT_OUTSIDE(j_2, k_2, 0, input_width)) {
if (input[i][j_2][k_2] > m) { if (input[i][j_2][k_2] > m) {
m = input[i][j_2][k_2]; m = input[i][j_2][k_2];
a_max = a; a_max = a;
@ -527,7 +527,7 @@ __global__ void backward_convolution_dweight_kernel(float**** d_weights, float**
for (int h=0; h < input_depth; h++) { for (int h=0; h < input_depth; h++) {
for (int j=-padding; j < max_move; j++) { for (int j=-padding; j < max_move; j++) {
for (int k=-padding; k < max_move; k++) { for (int k=-padding; k < max_move; k++) {
if (not_outside(idx*stride+j, idy*stride+k, 0, input_width)) { if (NOT_OUTSIDE(idx*stride+j, idy*stride+k, 0, input_width)) {
atomicAdd(&d_weights[h][idz][j+padding][k+padding], input[h][idx*stride+j][idy*stride+k]*output[idz][idx][idy]); atomicAdd(&d_weights[h][idz][j+padding][k+padding], input[h][idx*stride+j][idy*stride+k]*output[idz][idx][idy]);
} }
} }
@ -547,7 +547,7 @@ __global__ void backward_convolution_propagate_kernel(float**** weights, float**
for (int k=-padding; k < max_move; k++) { for (int k=-padding; k < max_move; k++) {
for (int l=0; l < output_width; l++) { for (int l=0; l < output_width; l++) {
for (int m=0; m < output_width; m++) { for (int m=0; m < output_width; m++) {
if (not_outside(l*stride+j, m*stride+k, 0, input_width)) { if (NOT_OUTSIDE(l*stride+j, m*stride+k, 0, input_width)) {
atomicAdd(&input[idx][l*stride+j][m*stride+k], output[idy][l][m]*weights[idx][idy][j+padding][k+padding]); atomicAdd(&input[idx][l*stride+j][m*stride+k], output[idy][l][m]*weights[idx][idy][j+padding][k+padding]);
} }
} }
@ -632,7 +632,7 @@ void backward_convolution_cpu(Kernel_cnn* ker, float*** input, float*** input_z,
float tmp = 0; float tmp = 0;
for (int l=0; l < output_width; l++) { for (int l=0; l < output_width; l++) {
for (int m=0; m < output_width; m++) { for (int m=0; m < output_width; m++) {
if (not_outside(l*stride+j, m*stride+k, 0, input_width)) { if (NOT_OUTSIDE(l*stride+j, m*stride+k, 0, input_width)) {
tmp += input[h][l*stride+j][m*stride+k]*output[i][l][m]; tmp += input[h][l*stride+j][m*stride+k]*output[i][l][m];
} }
} }
@ -659,7 +659,7 @@ void backward_convolution_cpu(Kernel_cnn* ker, float*** input, float*** input_z,
for (int k=-padding; k < max_move; k++) { for (int k=-padding; k < max_move; k++) {
for (int l=0; l < output_width; l++) { for (int l=0; l < output_width; l++) {
for (int m=0; m < output_width; m++) { for (int m=0; m < output_width; m++) {
if (not_outside(l*stride+j, m*stride+k, 0, input_width)) { if (NOT_OUTSIDE(l*stride+j, m*stride+k, 0, input_width)) {
input[h][l*stride+j][m*stride+k] += output[i][l][m]*ker->weights[h][i][j+padding][k+padding]; input[h][l*stride+j][m*stride+k] += output[i][l][m]*ker->weights[h][i][j+padding][k+padding];
} }
} }

View File

@ -118,7 +118,7 @@ __global__ void backward_average_pooling_kernel(float*** input, float*** output,
for (int b=-padding; b < max_move; b++) { for (int b=-padding; b < max_move; b++) {
int idy_2 = stride*idy +a; int idy_2 = stride*idy +a;
int idz_2 = stride*idz +b; int idz_2 = stride*idz +b;
if (not_outside(idy_2, idz_2, 0, input_width)) { if (NOT_OUTSIDE(idy_2, idz_2, 0, input_width)) {
int y = min(idy_2+1, min(kernel_size, input_width - idy_2)); int y = min(idy_2+1, min(kernel_size, input_width - idy_2));
int z = min(idz_2+1, min(kernel_size, input_width - idz_2)); int z = min(idz_2+1, min(kernel_size, input_width - idz_2));
input[idx][idy_2][idz_2] += output[idx][idy][idz]/(y*z); input[idx][idy_2][idz_2] += output[idx][idy][idz]/(y*z);
@ -155,7 +155,7 @@ void backward_average_pooling_cpu(float*** input, float*** output, int input_wid
for (int b=-padding; b < max_move; b++) { for (int b=-padding; b < max_move; b++) {
int j_2 = stride*j +a; int j_2 = stride*j +a;
int k_2 = stride*k + b; int k_2 = stride*k + b;
if (not_outside(j_2, k_2, 0, input_width)){ if (NOT_OUTSIDE(j_2, k_2, 0, input_width)){
int j_3 = min(j_2+1, min(kernel_size, input_width - j_2)); int j_3 = min(j_2+1, min(kernel_size, input_width - j_2));
int k_3 = min(k_2+1, min(kernel_size, input_width - k_2)); int k_3 = min(k_2+1, min(kernel_size, input_width - k_2));
input[i][j_2][k_2] += output[i][j][k]/(j_3*k_3); input[i][j_2][k_2] += output[i][j][k]/(j_3*k_3);
@ -202,7 +202,7 @@ __global__ void backward_max_pooling_kernel(float*** input, float*** output, int
for (int b=-padding; b < max_move; b++) { for (int b=-padding; b < max_move; b++) {
int idy_2 = stride*idy +a; int idy_2 = stride*idy +a;
int idz_2 = stride*idz +b; int idz_2 = stride*idz +b;
if (not_outside(idy_2, idz_2, 0, input_width)) { if (NOT_OUTSIDE(idy_2, idz_2, 0, input_width)) {
if (input[idx][idy_2][idz_2] > m) { if (input[idx][idy_2][idz_2] > m) {
m = input[idx][idy_2][idz_2]; m = input[idx][idy_2][idz_2];
a_max = a; a_max = a;
@ -250,7 +250,7 @@ void backward_max_pooling_cpu(float*** input, float*** output, int input_width,
for (int b=-padding; b < max_move; b++) { for (int b=-padding; b < max_move; b++) {
int j_2 = stride*j +a; int j_2 = stride*j +a;
int k_2 = stride*k +b; int k_2 = stride*k +b;
if (not_outside(j_2, k_2, 0, input_width)) { if (NOT_OUTSIDE(j_2, k_2, 0, input_width)) {
if (input[i][j_2][k_2] > m) { if (input[i][j_2][k_2] > m) {
m = input[i][j_2][k_2]; m = input[i][j_2][k_2];
a_max = a; a_max = a;
@ -527,7 +527,7 @@ __global__ void backward_convolution_dweight_kernel(float**** d_weights, float**
for (int h=0; h < input_depth; h++) { for (int h=0; h < input_depth; h++) {
for (int j=-padding; j < max_move; j++) { for (int j=-padding; j < max_move; j++) {
for (int k=-padding; k < max_move; k++) { for (int k=-padding; k < max_move; k++) {
if (not_outside(idx*stride+j, idy*stride+k, 0, input_width)) { if (NOT_OUTSIDE(idx*stride+j, idy*stride+k, 0, input_width)) {
atomicAdd(&d_weights[h][idz][j+padding][k+padding], input[h][idx*stride+j][idy*stride+k]*output[idz][idx][idy]); atomicAdd(&d_weights[h][idz][j+padding][k+padding], input[h][idx*stride+j][idy*stride+k]*output[idz][idx][idy]);
} }
} }
@ -547,7 +547,7 @@ __global__ void backward_convolution_propagate_kernel(float**** weights, float**
for (int k=-padding; k < max_move; k++) { for (int k=-padding; k < max_move; k++) {
for (int l=0; l < output_width; l++) { for (int l=0; l < output_width; l++) {
for (int m=0; m < output_width; m++) { for (int m=0; m < output_width; m++) {
if (not_outside(l*stride+j, m*stride+k, 0, input_width)) { if (NOT_OUTSIDE(l*stride+j, m*stride+k, 0, input_width)) {
atomicAdd(&input[idx][l*stride+j][m*stride+k], output[idy][l][m]*weights[idx][idy][j+padding][k+padding]); atomicAdd(&input[idx][l*stride+j][m*stride+k], output[idy][l][m]*weights[idx][idy][j+padding][k+padding]);
} }
} }
@ -632,7 +632,7 @@ void backward_convolution_cpu(Kernel_cnn* ker, float*** input, float*** input_z,
float tmp = 0; float tmp = 0;
for (int l=0; l < output_width; l++) { for (int l=0; l < output_width; l++) {
for (int m=0; m < output_width; m++) { for (int m=0; m < output_width; m++) {
if (not_outside(l*stride+j, m*stride+k, 0, input_width)) { if (NOT_OUTSIDE(l*stride+j, m*stride+k, 0, input_width)) {
tmp += input[h][l*stride+j][m*stride+k]*output[i][l][m]; tmp += input[h][l*stride+j][m*stride+k]*output[i][l][m];
} }
} }
@ -659,7 +659,7 @@ void backward_convolution_cpu(Kernel_cnn* ker, float*** input, float*** input_z,
for (int k=-padding; k < max_move; k++) { for (int k=-padding; k < max_move; k++) {
for (int l=0; l < output_width; l++) { for (int l=0; l < output_width; l++) {
for (int m=0; m < output_width; m++) { for (int m=0; m < output_width; m++) {
if (not_outside(l*stride+j, m*stride+k, 0, input_width)) { if (NOT_OUTSIDE(l*stride+j, m*stride+k, 0, input_width)) {
input[h][l*stride+j][m*stride+k] += output[i][l][m]*ker->weights[h][i][j+padding][k+padding]; input[h][l*stride+j][m*stride+k] += output[i][l][m]*ker->weights[h][i][j+padding][k+padding];
} }
} }

View File

@ -28,7 +28,7 @@ void make_convolution_cpu(Kernel_cnn* kernel, float*** input, float*** output, i
for (int c=-padding; c < max_move; c++) { // colonne du filtre for (int c=-padding; c < max_move; c++) { // colonne du filtre
int x = (stride*j+b); int x = (stride*j+b);
int y = (stride*k+c); int y = (stride*k+c);
if (not_outside(x, y, 0, input_width)) { if (NOT_OUTSIDE(x, y, 0, input_width)) {
f += kernel->weights[a][i][b+padding][c+padding]*input[a][x][y]; f += kernel->weights[a][i][b+padding][c+padding]*input[a][x][y];
} }
} }
@ -61,7 +61,7 @@ __global__ void make_convolution_kernel(float**** weights, float*** bias, int k_
for (int c=-padding; c < max_move; c++) { for (int c=-padding; c < max_move; c++) {
int idy_2 = idy*stride+b; int idy_2 = idy*stride+b;
int idz_2 = idz*stride+c; int idz_2 = idz*stride+c;
if (not_outside(idy_2, idz_2, 0, input_width)) { if (NOT_OUTSIDE(idy_2, idz_2, 0, input_width)) {
f += weights[a][idx][b+padding][c+padding]*input[a][idy_2][idz_2]; f += weights[a][idx][b+padding][c+padding]*input[a][idy_2][idz_2];
} }
} }

View File

@ -28,7 +28,7 @@ void make_convolution_cpu(Kernel_cnn* kernel, float*** input, float*** output, i
for (int c=-padding; c < max_move; c++) { // colonne du filtre for (int c=-padding; c < max_move; c++) { // colonne du filtre
int x = (stride*j+b); int x = (stride*j+b);
int y = (stride*k+c); int y = (stride*k+c);
if (not_outside(x, y, 0, input_width)) { if (NOT_OUTSIDE(x, y, 0, input_width)) {
f += kernel->weights[a][i][b+padding][c+padding]*input[a][x][y]; f += kernel->weights[a][i][b+padding][c+padding]*input[a][x][y];
} }
} }
@ -61,7 +61,7 @@ __global__ void make_convolution_kernel(float**** weights, float*** bias, int k_
for (int c=-padding; c < max_move; c++) { for (int c=-padding; c < max_move; c++) {
int idy_2 = idy*stride+b; int idy_2 = idy*stride+b;
int idz_2 = idz*stride+c; int idz_2 = idz*stride+c;
if (not_outside(idy_2, idz_2, 0, input_width)) { if (NOT_OUTSIDE(idy_2, idz_2, 0, input_width)) {
f += weights[a][idx][b+padding][c+padding]*input[a][idy_2][idz_2]; f += weights[a][idx][b+padding][c+padding]*input[a][idy_2][idz_2];
} }
} }

View File

@ -34,7 +34,7 @@ __global__ void make_average_pooling_kernel(float*** input, float*** output, int
for (int b=-padding; b < max_move; b++) { for (int b=-padding; b < max_move; b++) {
int idy_2 = stride*idy +a; int idy_2 = stride*idy +a;
int idz_2 = stride*idz +b; int idz_2 = stride*idz +b;
if (not_outside(idy_2, idz_2, 0, input_width)) { if (NOT_OUTSIDE(idy_2, idz_2, 0, input_width)) {
sum += input[idx][idy_2][idz_2]; sum += input[idx][idy_2][idz_2];
nb_elements++; nb_elements++;
} }
@ -69,7 +69,7 @@ void make_average_pooling_cpu(float*** input, float*** output, int size, int out
for (int b=-padding; b < max_move; b++) { for (int b=-padding; b < max_move; b++) {
int j_2 = stride*j +a; int j_2 = stride*j +a;
int k_2 = stride*k +b; int k_2 = stride*k +b;
if (not_outside(j_2, k_2, 0, input_width)) { if (NOT_OUTSIDE(j_2, k_2, 0, input_width)) {
sum += input[i][j_2][k_2]; sum += input[i][j_2][k_2];
nb_elements++; nb_elements++;
} }
@ -119,7 +119,7 @@ __global__ void make_max_pooling_kernel(float*** input, float*** output, int siz
for (int b=-padding; b < max_move; b++) { for (int b=-padding; b < max_move; b++) {
int idy_2 = stride*idy +a; int idy_2 = stride*idy +a;
int idz_2 = stride*idz +b; int idz_2 = stride*idz +b;
if (not_outside(idy_2, idz_2, 0, input_width)) { if (NOT_OUTSIDE(idy_2, idz_2, 0, input_width)) {
temp = input[idx][idy_2][idz_2]; temp = input[idx][idy_2][idz_2];
m = m > temp ? m : temp; // max(m, temp) m = m > temp ? m : temp; // max(m, temp)
} }
@ -153,7 +153,7 @@ void make_max_pooling_cpu(float*** input, float*** output, int size, int output_
for (int b=-padding; b < max_move; b++) { for (int b=-padding; b < max_move; b++) {
int j_2 = stride*j +a; int j_2 = stride*j +a;
int k_2 = stride*k +b; int k_2 = stride*k +b;
if (not_outside(j_2, k_2, 0, input_width)) { if (NOT_OUTSIDE(j_2, k_2, 0, input_width)) {
m = fmaxf(m, input[i][j_2][k_2]); m = fmaxf(m, input[i][j_2][k_2]);
} }
} }

View File

@ -34,7 +34,7 @@ __global__ void make_average_pooling_kernel(float*** input, float*** output, int
for (int b=-padding; b < max_move; b++) { for (int b=-padding; b < max_move; b++) {
int idy_2 = stride*idy +a; int idy_2 = stride*idy +a;
int idz_2 = stride*idz +b; int idz_2 = stride*idz +b;
if (not_outside(idy_2, idz_2, 0, input_width)) { if (NOT_OUTSIDE(idy_2, idz_2, 0, input_width)) {
sum += input[idx][idy_2][idz_2]; sum += input[idx][idy_2][idz_2];
nb_elements++; nb_elements++;
} }
@ -69,7 +69,7 @@ void make_average_pooling_cpu(float*** input, float*** output, int size, int out
for (int b=-padding; b < max_move; b++) { for (int b=-padding; b < max_move; b++) {
int j_2 = stride*j +a; int j_2 = stride*j +a;
int k_2 = stride*k +b; int k_2 = stride*k +b;
if (not_outside(j_2, k_2, 0, input_width)) { if (NOT_OUTSIDE(j_2, k_2, 0, input_width)) {
sum += input[i][j_2][k_2]; sum += input[i][j_2][k_2];
nb_elements++; nb_elements++;
} }
@ -119,7 +119,7 @@ __global__ void make_max_pooling_kernel(float*** input, float*** output, int siz
for (int b=-padding; b < max_move; b++) { for (int b=-padding; b < max_move; b++) {
int idy_2 = stride*idy +a; int idy_2 = stride*idy +a;
int idz_2 = stride*idz +b; int idz_2 = stride*idz +b;
if (not_outside(idy_2, idz_2, 0, input_width)) { if (NOT_OUTSIDE(idy_2, idz_2, 0, input_width)) {
temp = input[idx][idy_2][idz_2]; temp = input[idx][idy_2][idz_2];
m = m > temp ? m : temp; // max(m, temp) m = m > temp ? m : temp; // max(m, temp)
} }
@ -153,7 +153,7 @@ void make_max_pooling_cpu(float*** input, float*** output, int size, int output_
for (int b=-padding; b < max_move; b++) { for (int b=-padding; b < max_move; b++) {
int j_2 = stride*j +a; int j_2 = stride*j +a;
int k_2 = stride*k +b; int k_2 = stride*k +b;
if (not_outside(j_2, k_2, 0, input_width)) { if (NOT_OUTSIDE(j_2, k_2, 0, input_width)) {
m = fmaxf(m, input[i][j_2][k_2]); m = fmaxf(m, input[i][j_2][k_2]);
} }
} }

View File

@ -26,6 +26,7 @@ inline void gpuAssert(cudaError_t code, const char *file, int line, bool abort=t
} }
#endif #endif
#define NOT_OUTSIDE(x, y, lower_bound, upper_bound) !(x < lower_bound || y < lower_bound || x >= upper_bound || y>= upper_bound)
#ifndef __CUDACC__ #ifndef __CUDACC__
/* /*
@ -40,16 +41,6 @@ int max(int a, int b);
#endif #endif
#ifdef __CUDACC__
__host__ __device__
#endif
/*
* On renvoie true si et seulement si _ et _:
* lower_bound <= x < upper_bound
* lower_bound <= y < upper_bound
*/
bool not_outside(int x, int y, int lower_bound, int upper_bound);
/* /*
* Partie entière supérieure de a/b * Partie entière supérieure de a/b
*/ */

View File

@ -29,13 +29,6 @@ int max(int a, int b) {
#endif #endif
#ifdef __CUDACC__
__host__ __device__
#endif
bool not_outside(int x, int y, int lower_bound, int upper_bound) {
return !(x < lower_bound || y < lower_bound || x >= upper_bound || y>= upper_bound);
}
int i_div_up(int a, int b) { // Partie entière supérieure de a/b int i_div_up(int a, int b) { // Partie entière supérieure de a/b
return ((a % b) != 0) ? (a / b + 1) : (a / b); return ((a % b) != 0) ? (a / b + 1) : (a / b);
} }

View File

@ -29,13 +29,6 @@ int max(int a, int b) {
#endif #endif
#ifdef __CUDACC__
__host__ __device__
#endif
bool not_outside(int x, int y, int lower_bound, int upper_bound) {
return !(x < lower_bound || y < lower_bound || x >= upper_bound || y>= upper_bound);
}
int i_div_up(int a, int b) { // Partie entière supérieure de a/b int i_div_up(int a, int b) { // Partie entière supérieure de a/b
return ((a % b) != 0) ? (a / b + 1) : (a / b); return ((a % b) != 0) ? (a / b + 1) : (a / b);
} }