Ajout stride dans average_ pooling et max_pooling

This commit is contained in:
julienChemillier 2023-05-08 11:32:58 +02:00
parent 6e022fbd44
commit af288166d6
4 changed files with 40 additions and 40 deletions

View File

@ -208,9 +208,9 @@ void forward_propagation(Network* network) {
return; return;
} else { // Pooling sur une matrice } else { // Pooling sur une matrice
if (pooling == AVG_POOLING) { if (pooling == AVG_POOLING) {
make_average_pooling(input, output, input_width/output_width, output_depth, output_width); make_average_pooling(input, output, input_width/output_width, output_depth, output_width, input_width/output_width);
} else if (pooling == MAX_POOLING) { } else if (pooling == MAX_POOLING) {
make_max_pooling(input, output, input_width/output_width, output_depth, output_width); make_max_pooling(input, output, input_width/output_width, output_depth, output_width, input_width/output_width);
} else { } else {
printf_error("Impossible de reconnaître le type de couche de pooling: "); printf_error("Impossible de reconnaître le type de couche de pooling: ");
printf("identifiant: %d, position: %d\n", pooling, i); printf("identifiant: %d, position: %d\n", pooling, i);

View File

@ -19,7 +19,7 @@ extern "C"
/* /*
* Effectue un average pooling avec stride=size * Effectue un average pooling avec stride=size
*/ */
void make_average_pooling(float*** input, float*** output, int size, int output_depth, int output_dim); void make_average_pooling(float*** input, float*** output, int size, int output_depth, int output_dim, int stride);
#ifdef __CUDACC__ #ifdef __CUDACC__
extern "C" extern "C"
@ -27,7 +27,7 @@ extern "C"
/* /*
* Effectue un max pooling avec stride=size * Effectue un max pooling avec stride=size
*/ */
void make_max_pooling(float*** input, float*** output, int size, int output_depth, int output_dim); void make_max_pooling(float*** input, float*** output, int size, int output_depth, int output_dim, int stride);
#ifdef __CUDACC__ #ifdef __CUDACC__
extern "C" extern "C"

View File

@ -15,7 +15,7 @@
* Average Pooling * Average Pooling
*/ */
#ifdef __CUDACC__ #ifdef __CUDACC__
__global__ void make_average_pooling_kernel(float*** input, float*** output, int size, int output_depth, int output_width) { __global__ void make_average_pooling_kernel(float*** input, float*** output, int size, int output_depth, int output_width, int stride) {
// Équivalents respectifs de i, j et k dans la boucle effectuée par le cpu // Équivalents respectifs de i, j et k dans la boucle effectuée par le cpu
int idx = threadIdx.x + blockDim.x*blockIdx.x; // < output_depth int idx = threadIdx.x + blockDim.x*blockIdx.x; // < output_depth
int idy = threadIdx.y + blockDim.y*blockIdx.y; // < output_width int idy = threadIdx.y + blockDim.y*blockIdx.y; // < output_width
@ -30,24 +30,24 @@ __global__ void make_average_pooling_kernel(float*** input, float*** output, int
for (int a=0; a < size; a++) { for (int a=0; a < size; a++) {
for (int b=0; b < size; b++) { for (int b=0; b < size; b++) {
sum += input[idx][size*idy +a][size*idz +b]; sum += input[idx][stride*idy +a][stride*idz +b];
} }
} }
output[idx][idy][idz] = sum/(float)n; output[idx][idy][idz] = sum/(float)n;
} }
void make_average_pooling_device(float*** input, float*** output, int size, int output_depth, int output_width) { void make_average_pooling_device(float*** input, float*** output, int size, int output_depth, int output_width, int stride) {
// Make computation // Make computation
dim3 gridSize(i_div_up(output_depth, BLOCKSIZE_x), i_div_up(output_width, BLOCKSIZE_y), i_div_up(output_width, BLOCKSIZE_z)); dim3 gridSize(i_div_up(output_depth, BLOCKSIZE_x), i_div_up(output_width, BLOCKSIZE_y), i_div_up(output_width, BLOCKSIZE_z));
dim3 blockSize(BLOCKSIZE_x, BLOCKSIZE_y, BLOCKSIZE_z); dim3 blockSize(BLOCKSIZE_x, BLOCKSIZE_y, BLOCKSIZE_z);
make_average_pooling_kernel<<<gridSize, blockSize>>>(input, output, size, output_depth, output_width); make_average_pooling_kernel<<<gridSize, blockSize>>>(input, output, size, output_depth, output_width, stride);
gpuErrchk( cudaPeekAtLastError() ); gpuErrchk( cudaPeekAtLastError() );
gpuErrchk( cudaDeviceSynchronize() ); gpuErrchk( cudaDeviceSynchronize() );
} }
#endif #endif
void make_average_pooling_cpu(float*** input, float*** output, int size, int output_depth, int output_width) { void make_average_pooling_cpu(float*** input, float*** output, int size, int output_depth, int output_width, int stride) {
// input[output_depth][output_width+size-1][output_width+size-1] // input[output_depth][output_width+size-1][output_width+size-1]
// output[output_depth][output_width][output_width] // output[output_depth][output_width][output_width]
float sum; float sum;
@ -59,7 +59,7 @@ void make_average_pooling_cpu(float*** input, float*** output, int size, int out
sum = 0; sum = 0;
for (int a=0; a < size; a++) { for (int a=0; a < size; a++) {
for (int b=0; b < size; b++) { for (int b=0; b < size; b++) {
sum += input[i][size*j +a][size*k +b]; sum += input[i][stride*j +a][stride*k +b];
} }
} }
output[i][j][k] = sum/(float)n; output[i][j][k] = sum/(float)n;
@ -71,11 +71,11 @@ void make_average_pooling_cpu(float*** input, float*** output, int size, int out
#ifdef __CUDACC__ #ifdef __CUDACC__
extern "C" extern "C"
#endif #endif
void make_average_pooling(float*** input, float*** output, int size, int output_depth, int output_width) { void make_average_pooling(float*** input, float*** output, int size, int output_depth, int output_width, int stride) {
#ifndef __CUDACC__ #ifndef __CUDACC__
make_average_pooling_cpu(input, output, size, output_depth, output_width); make_average_pooling_cpu(input, output, size, output_depth, output_width, stride);
#else #else
make_average_pooling_device(input, output, size, output_depth, output_width); make_average_pooling_device(input, output, size, output_depth, output_width, stride);
#endif #endif
} }
@ -87,7 +87,7 @@ void make_average_pooling(float*** input, float*** output, int size, int output_
* Max Pooling * Max Pooling
*/ */
#ifdef __CUDACC__ #ifdef __CUDACC__
__global__ void make_max_pooling_kernel(float*** input, float*** output, int size, int output_depth, int output_width) { __global__ void make_max_pooling_kernel(float*** input, float*** output, int size, int output_depth, int output_width, int stride) {
// Équivalents respectifs de i, j et k dans la boucle effectuée par le cpu // Équivalents respectifs de i, j et k dans la boucle effectuée par le cpu
int idx = threadIdx.x + blockDim.x*blockIdx.x; // < output_depth int idx = threadIdx.x + blockDim.x*blockIdx.x; // < output_depth
int idy = threadIdx.y + blockDim.y*blockIdx.y; // < output_width int idy = threadIdx.y + blockDim.y*blockIdx.y; // < output_width
@ -102,25 +102,25 @@ __global__ void make_max_pooling_kernel(float*** input, float*** output, int siz
for (int a=0; a < size; a++) { for (int a=0; a < size; a++) {
for (int b=0; b < size; b++) { for (int b=0; b < size; b++) {
temp = input[idx][size*idy +a][size*idz +b]; temp = input[idx][stride*idy +a][stride*idz +b];
m = m > temp ? m : temp; // max(m, temp) m = m > temp ? m : temp; // max(m, temp)
} }
} }
output[idx][idy][idz] = m; output[idx][idy][idz] = m;
} }
void make_max_pooling_device(float*** input, float*** output, int size, int output_depth, int output_width) { void make_max_pooling_device(float*** input, float*** output, int size, int output_depth, int output_width, int stride) {
// Make computation // Make computation
dim3 gridSize(i_div_up(output_depth, BLOCKSIZE_x), i_div_up(output_width, BLOCKSIZE_y), i_div_up(output_width, BLOCKSIZE_z)); dim3 gridSize(i_div_up(output_depth, BLOCKSIZE_x), i_div_up(output_width, BLOCKSIZE_y), i_div_up(output_width, BLOCKSIZE_z));
dim3 blockSize(BLOCKSIZE_x, BLOCKSIZE_y, BLOCKSIZE_z); dim3 blockSize(BLOCKSIZE_x, BLOCKSIZE_y, BLOCKSIZE_z);
make_max_pooling_kernel<<<gridSize, blockSize>>>(input, output, size, output_depth, output_width); make_max_pooling_kernel<<<gridSize, blockSize>>>(input, output, size, output_depth, output_width, stride);
gpuErrchk( cudaPeekAtLastError() ); gpuErrchk( cudaPeekAtLastError() );
gpuErrchk( cudaDeviceSynchronize() ); gpuErrchk( cudaDeviceSynchronize() );
} }
#endif #endif
void make_max_pooling_cpu(float*** input, float*** output, int size, int output_depth, int output_width) { void make_max_pooling_cpu(float*** input, float*** output, int size, int output_depth, int output_width, int stride) {
// input[output_depth][output_width+size-1][output_width+size-1] // input[output_depth][output_width+size-1][output_width+size-1]
// output[output_depth][output_width][output_width] // output[output_depth][output_width][output_width]
float m; float m;
@ -130,7 +130,7 @@ void make_max_pooling_cpu(float*** input, float*** output, int size, int output_
m = -FLT_MAX; m = -FLT_MAX;
for (int a=0; a < size; a++) { for (int a=0; a < size; a++) {
for (int b=0; b < size; b++) { for (int b=0; b < size; b++) {
m = fmaxf(m, input[i][size*j +a][size*k +b]); m = fmaxf(m, input[i][stride*j +a][stride*k +b]);
} }
} }
output[i][j][k] = m; output[i][j][k] = m;
@ -142,11 +142,11 @@ void make_max_pooling_cpu(float*** input, float*** output, int size, int output_
#ifdef __CUDACC__ #ifdef __CUDACC__
extern "C" extern "C"
#endif #endif
void make_max_pooling(float*** input, float*** output, int size, int output_depth, int output_width) { void make_max_pooling(float*** input, float*** output, int size, int output_depth, int output_width, int stride) {
#ifndef __CUDACC__ #ifndef __CUDACC__
make_max_pooling_cpu(input, output, size, output_depth, output_width); make_max_pooling_cpu(input, output, size, output_depth, output_width, stride);
#else #else
make_max_pooling_device(input, output, size, output_depth, output_width); make_max_pooling_device(input, output, size, output_depth, output_width, stride);
#endif #endif
} }

View File

@ -15,7 +15,7 @@
* Average Pooling * Average Pooling
*/ */
#ifdef __CUDACC__ #ifdef __CUDACC__
__global__ void make_average_pooling_kernel(float*** input, float*** output, int size, int output_depth, int output_width) { __global__ void make_average_pooling_kernel(float*** input, float*** output, int size, int output_depth, int output_width, int stride) {
// Équivalents respectifs de i, j et k dans la boucle effectuée par le cpu // Équivalents respectifs de i, j et k dans la boucle effectuée par le cpu
int idx = threadIdx.x + blockDim.x*blockIdx.x; // < output_depth int idx = threadIdx.x + blockDim.x*blockIdx.x; // < output_depth
int idy = threadIdx.y + blockDim.y*blockIdx.y; // < output_width int idy = threadIdx.y + blockDim.y*blockIdx.y; // < output_width
@ -30,24 +30,24 @@ __global__ void make_average_pooling_kernel(float*** input, float*** output, int
for (int a=0; a < size; a++) { for (int a=0; a < size; a++) {
for (int b=0; b < size; b++) { for (int b=0; b < size; b++) {
sum += input[idx][size*idy +a][size*idz +b]; sum += input[idx][stride*idy +a][stride*idz +b];
} }
} }
output[idx][idy][idz] = sum/(float)n; output[idx][idy][idz] = sum/(float)n;
} }
void make_average_pooling_device(float*** input, float*** output, int size, int output_depth, int output_width) { void make_average_pooling_device(float*** input, float*** output, int size, int output_depth, int output_width, int stride) {
// Make computation // Make computation
dim3 gridSize(i_div_up(output_depth, BLOCKSIZE_x), i_div_up(output_width, BLOCKSIZE_y), i_div_up(output_width, BLOCKSIZE_z)); dim3 gridSize(i_div_up(output_depth, BLOCKSIZE_x), i_div_up(output_width, BLOCKSIZE_y), i_div_up(output_width, BLOCKSIZE_z));
dim3 blockSize(BLOCKSIZE_x, BLOCKSIZE_y, BLOCKSIZE_z); dim3 blockSize(BLOCKSIZE_x, BLOCKSIZE_y, BLOCKSIZE_z);
make_average_pooling_kernel<<<gridSize, blockSize>>>(input, output, size, output_depth, output_width); make_average_pooling_kernel<<<gridSize, blockSize>>>(input, output, size, output_depth, output_width, stride);
gpuErrchk( cudaPeekAtLastError() ); gpuErrchk( cudaPeekAtLastError() );
gpuErrchk( cudaDeviceSynchronize() ); gpuErrchk( cudaDeviceSynchronize() );
} }
#endif #endif
void make_average_pooling_cpu(float*** input, float*** output, int size, int output_depth, int output_width) { void make_average_pooling_cpu(float*** input, float*** output, int size, int output_depth, int output_width, int stride) {
// input[output_depth][output_width+size-1][output_width+size-1] // input[output_depth][output_width+size-1][output_width+size-1]
// output[output_depth][output_width][output_width] // output[output_depth][output_width][output_width]
float sum; float sum;
@ -59,7 +59,7 @@ void make_average_pooling_cpu(float*** input, float*** output, int size, int out
sum = 0; sum = 0;
for (int a=0; a < size; a++) { for (int a=0; a < size; a++) {
for (int b=0; b < size; b++) { for (int b=0; b < size; b++) {
sum += input[i][size*j +a][size*k +b]; sum += input[i][stride*j +a][stride*k +b];
} }
} }
output[i][j][k] = sum/(float)n; output[i][j][k] = sum/(float)n;
@ -71,11 +71,11 @@ void make_average_pooling_cpu(float*** input, float*** output, int size, int out
#ifdef __CUDACC__ #ifdef __CUDACC__
extern "C" extern "C"
#endif #endif
void make_average_pooling(float*** input, float*** output, int size, int output_depth, int output_width) { void make_average_pooling(float*** input, float*** output, int size, int output_depth, int output_width, int stride) {
#ifndef __CUDACC__ #ifndef __CUDACC__
make_average_pooling_cpu(input, output, size, output_depth, output_width); make_average_pooling_cpu(input, output, size, output_depth, output_width, stride);
#else #else
make_average_pooling_device(input, output, size, output_depth, output_width); make_average_pooling_device(input, output, size, output_depth, output_width, stride);
#endif #endif
} }
@ -87,7 +87,7 @@ void make_average_pooling(float*** input, float*** output, int size, int output_
* Max Pooling * Max Pooling
*/ */
#ifdef __CUDACC__ #ifdef __CUDACC__
__global__ void make_max_pooling_kernel(float*** input, float*** output, int size, int output_depth, int output_width) { __global__ void make_max_pooling_kernel(float*** input, float*** output, int size, int output_depth, int output_width, int stride) {
// Équivalents respectifs de i, j et k dans la boucle effectuée par le cpu // Équivalents respectifs de i, j et k dans la boucle effectuée par le cpu
int idx = threadIdx.x + blockDim.x*blockIdx.x; // < output_depth int idx = threadIdx.x + blockDim.x*blockIdx.x; // < output_depth
int idy = threadIdx.y + blockDim.y*blockIdx.y; // < output_width int idy = threadIdx.y + blockDim.y*blockIdx.y; // < output_width
@ -102,25 +102,25 @@ __global__ void make_max_pooling_kernel(float*** input, float*** output, int siz
for (int a=0; a < size; a++) { for (int a=0; a < size; a++) {
for (int b=0; b < size; b++) { for (int b=0; b < size; b++) {
temp = input[idx][size*idy +a][size*idz +b]; temp = input[idx][stride*idy +a][stride*idz +b];
m = m > temp ? m : temp; // max(m, temp) m = m > temp ? m : temp; // max(m, temp)
} }
} }
output[idx][idy][idz] = m; output[idx][idy][idz] = m;
} }
void make_max_pooling_device(float*** input, float*** output, int size, int output_depth, int output_width) { void make_max_pooling_device(float*** input, float*** output, int size, int output_depth, int output_width, int stride) {
// Make computation // Make computation
dim3 gridSize(i_div_up(output_depth, BLOCKSIZE_x), i_div_up(output_width, BLOCKSIZE_y), i_div_up(output_width, BLOCKSIZE_z)); dim3 gridSize(i_div_up(output_depth, BLOCKSIZE_x), i_div_up(output_width, BLOCKSIZE_y), i_div_up(output_width, BLOCKSIZE_z));
dim3 blockSize(BLOCKSIZE_x, BLOCKSIZE_y, BLOCKSIZE_z); dim3 blockSize(BLOCKSIZE_x, BLOCKSIZE_y, BLOCKSIZE_z);
make_max_pooling_kernel<<<gridSize, blockSize>>>(input, output, size, output_depth, output_width); make_max_pooling_kernel<<<gridSize, blockSize>>>(input, output, size, output_depth, output_width, stride);
gpuErrchk( cudaPeekAtLastError() ); gpuErrchk( cudaPeekAtLastError() );
gpuErrchk( cudaDeviceSynchronize() ); gpuErrchk( cudaDeviceSynchronize() );
} }
#endif #endif
void make_max_pooling_cpu(float*** input, float*** output, int size, int output_depth, int output_width) { void make_max_pooling_cpu(float*** input, float*** output, int size, int output_depth, int output_width, int stride) {
// input[output_depth][output_width+size-1][output_width+size-1] // input[output_depth][output_width+size-1][output_width+size-1]
// output[output_depth][output_width][output_width] // output[output_depth][output_width][output_width]
float m; float m;
@ -130,7 +130,7 @@ void make_max_pooling_cpu(float*** input, float*** output, int size, int output_
m = -FLT_MAX; m = -FLT_MAX;
for (int a=0; a < size; a++) { for (int a=0; a < size; a++) {
for (int b=0; b < size; b++) { for (int b=0; b < size; b++) {
m = fmaxf(m, input[i][size*j +a][size*k +b]); m = fmaxf(m, input[i][stride*j +a][stride*k +b]);
} }
} }
output[i][j][k] = m; output[i][j][k] = m;
@ -142,11 +142,11 @@ void make_max_pooling_cpu(float*** input, float*** output, int size, int output_
#ifdef __CUDACC__ #ifdef __CUDACC__
extern "C" extern "C"
#endif #endif
void make_max_pooling(float*** input, float*** output, int size, int output_depth, int output_width) { void make_max_pooling(float*** input, float*** output, int size, int output_depth, int output_width, int stride) {
#ifndef __CUDACC__ #ifndef __CUDACC__
make_max_pooling_cpu(input, output, size, output_depth, output_width); make_max_pooling_cpu(input, output, size, output_depth, output_width, stride);
#else #else
make_max_pooling_device(input, output, size, output_depth, output_width); make_max_pooling_device(input, output, size, output_depth, output_width, stride);
#endif #endif
} }