mirror of
https://github.com/augustin64/projet-tipe
synced 2025-01-23 15:16:26 +01:00
Ajout stride dans average_ pooling et max_pooling
This commit is contained in:
parent
6e022fbd44
commit
af288166d6
@ -208,9 +208,9 @@ void forward_propagation(Network* network) {
|
|||||||
return;
|
return;
|
||||||
} else { // Pooling sur une matrice
|
} else { // Pooling sur une matrice
|
||||||
if (pooling == AVG_POOLING) {
|
if (pooling == AVG_POOLING) {
|
||||||
make_average_pooling(input, output, input_width/output_width, output_depth, output_width);
|
make_average_pooling(input, output, input_width/output_width, output_depth, output_width, input_width/output_width);
|
||||||
} else if (pooling == MAX_POOLING) {
|
} else if (pooling == MAX_POOLING) {
|
||||||
make_max_pooling(input, output, input_width/output_width, output_depth, output_width);
|
make_max_pooling(input, output, input_width/output_width, output_depth, output_width, input_width/output_width);
|
||||||
} else {
|
} else {
|
||||||
printf_error("Impossible de reconnaître le type de couche de pooling: ");
|
printf_error("Impossible de reconnaître le type de couche de pooling: ");
|
||||||
printf("identifiant: %d, position: %d\n", pooling, i);
|
printf("identifiant: %d, position: %d\n", pooling, i);
|
||||||
|
@ -19,7 +19,7 @@ extern "C"
|
|||||||
/*
|
/*
|
||||||
* Effectue un average pooling avec stride=size
|
* Effectue un average pooling avec stride=size
|
||||||
*/
|
*/
|
||||||
void make_average_pooling(float*** input, float*** output, int size, int output_depth, int output_dim);
|
void make_average_pooling(float*** input, float*** output, int size, int output_depth, int output_dim, int stride);
|
||||||
|
|
||||||
#ifdef __CUDACC__
|
#ifdef __CUDACC__
|
||||||
extern "C"
|
extern "C"
|
||||||
@ -27,7 +27,7 @@ extern "C"
|
|||||||
/*
|
/*
|
||||||
* Effectue un max pooling avec stride=size
|
* Effectue un max pooling avec stride=size
|
||||||
*/
|
*/
|
||||||
void make_max_pooling(float*** input, float*** output, int size, int output_depth, int output_dim);
|
void make_max_pooling(float*** input, float*** output, int size, int output_depth, int output_dim, int stride);
|
||||||
|
|
||||||
#ifdef __CUDACC__
|
#ifdef __CUDACC__
|
||||||
extern "C"
|
extern "C"
|
||||||
|
@ -15,7 +15,7 @@
|
|||||||
* Average Pooling
|
* Average Pooling
|
||||||
*/
|
*/
|
||||||
#ifdef __CUDACC__
|
#ifdef __CUDACC__
|
||||||
__global__ void make_average_pooling_kernel(float*** input, float*** output, int size, int output_depth, int output_width) {
|
__global__ void make_average_pooling_kernel(float*** input, float*** output, int size, int output_depth, int output_width, int stride) {
|
||||||
// Équivalents respectifs de i, j et k dans la boucle effectuée par le cpu
|
// Équivalents respectifs de i, j et k dans la boucle effectuée par le cpu
|
||||||
int idx = threadIdx.x + blockDim.x*blockIdx.x; // < output_depth
|
int idx = threadIdx.x + blockDim.x*blockIdx.x; // < output_depth
|
||||||
int idy = threadIdx.y + blockDim.y*blockIdx.y; // < output_width
|
int idy = threadIdx.y + blockDim.y*blockIdx.y; // < output_width
|
||||||
@ -30,24 +30,24 @@ __global__ void make_average_pooling_kernel(float*** input, float*** output, int
|
|||||||
|
|
||||||
for (int a=0; a < size; a++) {
|
for (int a=0; a < size; a++) {
|
||||||
for (int b=0; b < size; b++) {
|
for (int b=0; b < size; b++) {
|
||||||
sum += input[idx][size*idy +a][size*idz +b];
|
sum += input[idx][stride*idy +a][stride*idz +b];
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
output[idx][idy][idz] = sum/(float)n;
|
output[idx][idy][idz] = sum/(float)n;
|
||||||
}
|
}
|
||||||
|
|
||||||
void make_average_pooling_device(float*** input, float*** output, int size, int output_depth, int output_width) {
|
void make_average_pooling_device(float*** input, float*** output, int size, int output_depth, int output_width, int stride) {
|
||||||
// Make computation
|
// Make computation
|
||||||
dim3 gridSize(i_div_up(output_depth, BLOCKSIZE_x), i_div_up(output_width, BLOCKSIZE_y), i_div_up(output_width, BLOCKSIZE_z));
|
dim3 gridSize(i_div_up(output_depth, BLOCKSIZE_x), i_div_up(output_width, BLOCKSIZE_y), i_div_up(output_width, BLOCKSIZE_z));
|
||||||
dim3 blockSize(BLOCKSIZE_x, BLOCKSIZE_y, BLOCKSIZE_z);
|
dim3 blockSize(BLOCKSIZE_x, BLOCKSIZE_y, BLOCKSIZE_z);
|
||||||
|
|
||||||
make_average_pooling_kernel<<<gridSize, blockSize>>>(input, output, size, output_depth, output_width);
|
make_average_pooling_kernel<<<gridSize, blockSize>>>(input, output, size, output_depth, output_width, stride);
|
||||||
gpuErrchk( cudaPeekAtLastError() );
|
gpuErrchk( cudaPeekAtLastError() );
|
||||||
gpuErrchk( cudaDeviceSynchronize() );
|
gpuErrchk( cudaDeviceSynchronize() );
|
||||||
}
|
}
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
void make_average_pooling_cpu(float*** input, float*** output, int size, int output_depth, int output_width) {
|
void make_average_pooling_cpu(float*** input, float*** output, int size, int output_depth, int output_width, int stride) {
|
||||||
// input[output_depth][output_width+size-1][output_width+size-1]
|
// input[output_depth][output_width+size-1][output_width+size-1]
|
||||||
// output[output_depth][output_width][output_width]
|
// output[output_depth][output_width][output_width]
|
||||||
float sum;
|
float sum;
|
||||||
@ -59,7 +59,7 @@ void make_average_pooling_cpu(float*** input, float*** output, int size, int out
|
|||||||
sum = 0;
|
sum = 0;
|
||||||
for (int a=0; a < size; a++) {
|
for (int a=0; a < size; a++) {
|
||||||
for (int b=0; b < size; b++) {
|
for (int b=0; b < size; b++) {
|
||||||
sum += input[i][size*j +a][size*k +b];
|
sum += input[i][stride*j +a][stride*k +b];
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
output[i][j][k] = sum/(float)n;
|
output[i][j][k] = sum/(float)n;
|
||||||
@ -71,11 +71,11 @@ void make_average_pooling_cpu(float*** input, float*** output, int size, int out
|
|||||||
#ifdef __CUDACC__
|
#ifdef __CUDACC__
|
||||||
extern "C"
|
extern "C"
|
||||||
#endif
|
#endif
|
||||||
void make_average_pooling(float*** input, float*** output, int size, int output_depth, int output_width) {
|
void make_average_pooling(float*** input, float*** output, int size, int output_depth, int output_width, int stride) {
|
||||||
#ifndef __CUDACC__
|
#ifndef __CUDACC__
|
||||||
make_average_pooling_cpu(input, output, size, output_depth, output_width);
|
make_average_pooling_cpu(input, output, size, output_depth, output_width, stride);
|
||||||
#else
|
#else
|
||||||
make_average_pooling_device(input, output, size, output_depth, output_width);
|
make_average_pooling_device(input, output, size, output_depth, output_width, stride);
|
||||||
#endif
|
#endif
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -87,7 +87,7 @@ void make_average_pooling(float*** input, float*** output, int size, int output_
|
|||||||
* Max Pooling
|
* Max Pooling
|
||||||
*/
|
*/
|
||||||
#ifdef __CUDACC__
|
#ifdef __CUDACC__
|
||||||
__global__ void make_max_pooling_kernel(float*** input, float*** output, int size, int output_depth, int output_width) {
|
__global__ void make_max_pooling_kernel(float*** input, float*** output, int size, int output_depth, int output_width, int stride) {
|
||||||
// Équivalents respectifs de i, j et k dans la boucle effectuée par le cpu
|
// Équivalents respectifs de i, j et k dans la boucle effectuée par le cpu
|
||||||
int idx = threadIdx.x + blockDim.x*blockIdx.x; // < output_depth
|
int idx = threadIdx.x + blockDim.x*blockIdx.x; // < output_depth
|
||||||
int idy = threadIdx.y + blockDim.y*blockIdx.y; // < output_width
|
int idy = threadIdx.y + blockDim.y*blockIdx.y; // < output_width
|
||||||
@ -102,25 +102,25 @@ __global__ void make_max_pooling_kernel(float*** input, float*** output, int siz
|
|||||||
|
|
||||||
for (int a=0; a < size; a++) {
|
for (int a=0; a < size; a++) {
|
||||||
for (int b=0; b < size; b++) {
|
for (int b=0; b < size; b++) {
|
||||||
temp = input[idx][size*idy +a][size*idz +b];
|
temp = input[idx][stride*idy +a][stride*idz +b];
|
||||||
m = m > temp ? m : temp; // max(m, temp)
|
m = m > temp ? m : temp; // max(m, temp)
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
output[idx][idy][idz] = m;
|
output[idx][idy][idz] = m;
|
||||||
}
|
}
|
||||||
|
|
||||||
void make_max_pooling_device(float*** input, float*** output, int size, int output_depth, int output_width) {
|
void make_max_pooling_device(float*** input, float*** output, int size, int output_depth, int output_width, int stride) {
|
||||||
// Make computation
|
// Make computation
|
||||||
dim3 gridSize(i_div_up(output_depth, BLOCKSIZE_x), i_div_up(output_width, BLOCKSIZE_y), i_div_up(output_width, BLOCKSIZE_z));
|
dim3 gridSize(i_div_up(output_depth, BLOCKSIZE_x), i_div_up(output_width, BLOCKSIZE_y), i_div_up(output_width, BLOCKSIZE_z));
|
||||||
dim3 blockSize(BLOCKSIZE_x, BLOCKSIZE_y, BLOCKSIZE_z);
|
dim3 blockSize(BLOCKSIZE_x, BLOCKSIZE_y, BLOCKSIZE_z);
|
||||||
|
|
||||||
make_max_pooling_kernel<<<gridSize, blockSize>>>(input, output, size, output_depth, output_width);
|
make_max_pooling_kernel<<<gridSize, blockSize>>>(input, output, size, output_depth, output_width, stride);
|
||||||
gpuErrchk( cudaPeekAtLastError() );
|
gpuErrchk( cudaPeekAtLastError() );
|
||||||
gpuErrchk( cudaDeviceSynchronize() );
|
gpuErrchk( cudaDeviceSynchronize() );
|
||||||
}
|
}
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
void make_max_pooling_cpu(float*** input, float*** output, int size, int output_depth, int output_width) {
|
void make_max_pooling_cpu(float*** input, float*** output, int size, int output_depth, int output_width, int stride) {
|
||||||
// input[output_depth][output_width+size-1][output_width+size-1]
|
// input[output_depth][output_width+size-1][output_width+size-1]
|
||||||
// output[output_depth][output_width][output_width]
|
// output[output_depth][output_width][output_width]
|
||||||
float m;
|
float m;
|
||||||
@ -130,7 +130,7 @@ void make_max_pooling_cpu(float*** input, float*** output, int size, int output_
|
|||||||
m = -FLT_MAX;
|
m = -FLT_MAX;
|
||||||
for (int a=0; a < size; a++) {
|
for (int a=0; a < size; a++) {
|
||||||
for (int b=0; b < size; b++) {
|
for (int b=0; b < size; b++) {
|
||||||
m = fmaxf(m, input[i][size*j +a][size*k +b]);
|
m = fmaxf(m, input[i][stride*j +a][stride*k +b]);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
output[i][j][k] = m;
|
output[i][j][k] = m;
|
||||||
@ -142,11 +142,11 @@ void make_max_pooling_cpu(float*** input, float*** output, int size, int output_
|
|||||||
#ifdef __CUDACC__
|
#ifdef __CUDACC__
|
||||||
extern "C"
|
extern "C"
|
||||||
#endif
|
#endif
|
||||||
void make_max_pooling(float*** input, float*** output, int size, int output_depth, int output_width) {
|
void make_max_pooling(float*** input, float*** output, int size, int output_depth, int output_width, int stride) {
|
||||||
#ifndef __CUDACC__
|
#ifndef __CUDACC__
|
||||||
make_max_pooling_cpu(input, output, size, output_depth, output_width);
|
make_max_pooling_cpu(input, output, size, output_depth, output_width, stride);
|
||||||
#else
|
#else
|
||||||
make_max_pooling_device(input, output, size, output_depth, output_width);
|
make_max_pooling_device(input, output, size, output_depth, output_width, stride);
|
||||||
#endif
|
#endif
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -15,7 +15,7 @@
|
|||||||
* Average Pooling
|
* Average Pooling
|
||||||
*/
|
*/
|
||||||
#ifdef __CUDACC__
|
#ifdef __CUDACC__
|
||||||
__global__ void make_average_pooling_kernel(float*** input, float*** output, int size, int output_depth, int output_width) {
|
__global__ void make_average_pooling_kernel(float*** input, float*** output, int size, int output_depth, int output_width, int stride) {
|
||||||
// Équivalents respectifs de i, j et k dans la boucle effectuée par le cpu
|
// Équivalents respectifs de i, j et k dans la boucle effectuée par le cpu
|
||||||
int idx = threadIdx.x + blockDim.x*blockIdx.x; // < output_depth
|
int idx = threadIdx.x + blockDim.x*blockIdx.x; // < output_depth
|
||||||
int idy = threadIdx.y + blockDim.y*blockIdx.y; // < output_width
|
int idy = threadIdx.y + blockDim.y*blockIdx.y; // < output_width
|
||||||
@ -30,24 +30,24 @@ __global__ void make_average_pooling_kernel(float*** input, float*** output, int
|
|||||||
|
|
||||||
for (int a=0; a < size; a++) {
|
for (int a=0; a < size; a++) {
|
||||||
for (int b=0; b < size; b++) {
|
for (int b=0; b < size; b++) {
|
||||||
sum += input[idx][size*idy +a][size*idz +b];
|
sum += input[idx][stride*idy +a][stride*idz +b];
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
output[idx][idy][idz] = sum/(float)n;
|
output[idx][idy][idz] = sum/(float)n;
|
||||||
}
|
}
|
||||||
|
|
||||||
void make_average_pooling_device(float*** input, float*** output, int size, int output_depth, int output_width) {
|
void make_average_pooling_device(float*** input, float*** output, int size, int output_depth, int output_width, int stride) {
|
||||||
// Make computation
|
// Make computation
|
||||||
dim3 gridSize(i_div_up(output_depth, BLOCKSIZE_x), i_div_up(output_width, BLOCKSIZE_y), i_div_up(output_width, BLOCKSIZE_z));
|
dim3 gridSize(i_div_up(output_depth, BLOCKSIZE_x), i_div_up(output_width, BLOCKSIZE_y), i_div_up(output_width, BLOCKSIZE_z));
|
||||||
dim3 blockSize(BLOCKSIZE_x, BLOCKSIZE_y, BLOCKSIZE_z);
|
dim3 blockSize(BLOCKSIZE_x, BLOCKSIZE_y, BLOCKSIZE_z);
|
||||||
|
|
||||||
make_average_pooling_kernel<<<gridSize, blockSize>>>(input, output, size, output_depth, output_width);
|
make_average_pooling_kernel<<<gridSize, blockSize>>>(input, output, size, output_depth, output_width, stride);
|
||||||
gpuErrchk( cudaPeekAtLastError() );
|
gpuErrchk( cudaPeekAtLastError() );
|
||||||
gpuErrchk( cudaDeviceSynchronize() );
|
gpuErrchk( cudaDeviceSynchronize() );
|
||||||
}
|
}
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
void make_average_pooling_cpu(float*** input, float*** output, int size, int output_depth, int output_width) {
|
void make_average_pooling_cpu(float*** input, float*** output, int size, int output_depth, int output_width, int stride) {
|
||||||
// input[output_depth][output_width+size-1][output_width+size-1]
|
// input[output_depth][output_width+size-1][output_width+size-1]
|
||||||
// output[output_depth][output_width][output_width]
|
// output[output_depth][output_width][output_width]
|
||||||
float sum;
|
float sum;
|
||||||
@ -59,7 +59,7 @@ void make_average_pooling_cpu(float*** input, float*** output, int size, int out
|
|||||||
sum = 0;
|
sum = 0;
|
||||||
for (int a=0; a < size; a++) {
|
for (int a=0; a < size; a++) {
|
||||||
for (int b=0; b < size; b++) {
|
for (int b=0; b < size; b++) {
|
||||||
sum += input[i][size*j +a][size*k +b];
|
sum += input[i][stride*j +a][stride*k +b];
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
output[i][j][k] = sum/(float)n;
|
output[i][j][k] = sum/(float)n;
|
||||||
@ -71,11 +71,11 @@ void make_average_pooling_cpu(float*** input, float*** output, int size, int out
|
|||||||
#ifdef __CUDACC__
|
#ifdef __CUDACC__
|
||||||
extern "C"
|
extern "C"
|
||||||
#endif
|
#endif
|
||||||
void make_average_pooling(float*** input, float*** output, int size, int output_depth, int output_width) {
|
void make_average_pooling(float*** input, float*** output, int size, int output_depth, int output_width, int stride) {
|
||||||
#ifndef __CUDACC__
|
#ifndef __CUDACC__
|
||||||
make_average_pooling_cpu(input, output, size, output_depth, output_width);
|
make_average_pooling_cpu(input, output, size, output_depth, output_width, stride);
|
||||||
#else
|
#else
|
||||||
make_average_pooling_device(input, output, size, output_depth, output_width);
|
make_average_pooling_device(input, output, size, output_depth, output_width, stride);
|
||||||
#endif
|
#endif
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -87,7 +87,7 @@ void make_average_pooling(float*** input, float*** output, int size, int output_
|
|||||||
* Max Pooling
|
* Max Pooling
|
||||||
*/
|
*/
|
||||||
#ifdef __CUDACC__
|
#ifdef __CUDACC__
|
||||||
__global__ void make_max_pooling_kernel(float*** input, float*** output, int size, int output_depth, int output_width) {
|
__global__ void make_max_pooling_kernel(float*** input, float*** output, int size, int output_depth, int output_width, int stride) {
|
||||||
// Équivalents respectifs de i, j et k dans la boucle effectuée par le cpu
|
// Équivalents respectifs de i, j et k dans la boucle effectuée par le cpu
|
||||||
int idx = threadIdx.x + blockDim.x*blockIdx.x; // < output_depth
|
int idx = threadIdx.x + blockDim.x*blockIdx.x; // < output_depth
|
||||||
int idy = threadIdx.y + blockDim.y*blockIdx.y; // < output_width
|
int idy = threadIdx.y + blockDim.y*blockIdx.y; // < output_width
|
||||||
@ -102,25 +102,25 @@ __global__ void make_max_pooling_kernel(float*** input, float*** output, int siz
|
|||||||
|
|
||||||
for (int a=0; a < size; a++) {
|
for (int a=0; a < size; a++) {
|
||||||
for (int b=0; b < size; b++) {
|
for (int b=0; b < size; b++) {
|
||||||
temp = input[idx][size*idy +a][size*idz +b];
|
temp = input[idx][stride*idy +a][stride*idz +b];
|
||||||
m = m > temp ? m : temp; // max(m, temp)
|
m = m > temp ? m : temp; // max(m, temp)
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
output[idx][idy][idz] = m;
|
output[idx][idy][idz] = m;
|
||||||
}
|
}
|
||||||
|
|
||||||
void make_max_pooling_device(float*** input, float*** output, int size, int output_depth, int output_width) {
|
void make_max_pooling_device(float*** input, float*** output, int size, int output_depth, int output_width, int stride) {
|
||||||
// Make computation
|
// Make computation
|
||||||
dim3 gridSize(i_div_up(output_depth, BLOCKSIZE_x), i_div_up(output_width, BLOCKSIZE_y), i_div_up(output_width, BLOCKSIZE_z));
|
dim3 gridSize(i_div_up(output_depth, BLOCKSIZE_x), i_div_up(output_width, BLOCKSIZE_y), i_div_up(output_width, BLOCKSIZE_z));
|
||||||
dim3 blockSize(BLOCKSIZE_x, BLOCKSIZE_y, BLOCKSIZE_z);
|
dim3 blockSize(BLOCKSIZE_x, BLOCKSIZE_y, BLOCKSIZE_z);
|
||||||
|
|
||||||
make_max_pooling_kernel<<<gridSize, blockSize>>>(input, output, size, output_depth, output_width);
|
make_max_pooling_kernel<<<gridSize, blockSize>>>(input, output, size, output_depth, output_width, stride);
|
||||||
gpuErrchk( cudaPeekAtLastError() );
|
gpuErrchk( cudaPeekAtLastError() );
|
||||||
gpuErrchk( cudaDeviceSynchronize() );
|
gpuErrchk( cudaDeviceSynchronize() );
|
||||||
}
|
}
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
void make_max_pooling_cpu(float*** input, float*** output, int size, int output_depth, int output_width) {
|
void make_max_pooling_cpu(float*** input, float*** output, int size, int output_depth, int output_width, int stride) {
|
||||||
// input[output_depth][output_width+size-1][output_width+size-1]
|
// input[output_depth][output_width+size-1][output_width+size-1]
|
||||||
// output[output_depth][output_width][output_width]
|
// output[output_depth][output_width][output_width]
|
||||||
float m;
|
float m;
|
||||||
@ -130,7 +130,7 @@ void make_max_pooling_cpu(float*** input, float*** output, int size, int output_
|
|||||||
m = -FLT_MAX;
|
m = -FLT_MAX;
|
||||||
for (int a=0; a < size; a++) {
|
for (int a=0; a < size; a++) {
|
||||||
for (int b=0; b < size; b++) {
|
for (int b=0; b < size; b++) {
|
||||||
m = fmaxf(m, input[i][size*j +a][size*k +b]);
|
m = fmaxf(m, input[i][stride*j +a][stride*k +b]);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
output[i][j][k] = m;
|
output[i][j][k] = m;
|
||||||
@ -142,11 +142,11 @@ void make_max_pooling_cpu(float*** input, float*** output, int size, int output_
|
|||||||
#ifdef __CUDACC__
|
#ifdef __CUDACC__
|
||||||
extern "C"
|
extern "C"
|
||||||
#endif
|
#endif
|
||||||
void make_max_pooling(float*** input, float*** output, int size, int output_depth, int output_width) {
|
void make_max_pooling(float*** input, float*** output, int size, int output_depth, int output_width, int stride) {
|
||||||
#ifndef __CUDACC__
|
#ifndef __CUDACC__
|
||||||
make_max_pooling_cpu(input, output, size, output_depth, output_width);
|
make_max_pooling_cpu(input, output, size, output_depth, output_width, stride);
|
||||||
#else
|
#else
|
||||||
make_max_pooling_device(input, output, size, output_depth, output_width);
|
make_max_pooling_device(input, output, size, output_depth, output_width, stride);
|
||||||
#endif
|
#endif
|
||||||
}
|
}
|
||||||
|
|
||||||
|
Loading…
Reference in New Issue
Block a user