2023-02-15 11:42:24 +01:00
|
|
|
#include <stdio.h>
|
|
|
|
#include <float.h>
|
2023-03-09 14:27:23 +01:00
|
|
|
#include <math.h>
|
2023-02-15 11:42:24 +01:00
|
|
|
|
2023-05-12 16:16:34 +02:00
|
|
|
#include "../common/include/colors.h"
|
|
|
|
#include "../common/include/utils.h"
|
2023-02-15 11:42:24 +01:00
|
|
|
#include "include/convolution.h"
|
|
|
|
|
|
|
|
#include "include/make.h"
|
|
|
|
|
2023-03-28 12:54:49 +02:00
|
|
|
#include "include/config.h"
|
2023-02-15 11:42:24 +01:00
|
|
|
|
2023-05-13 13:37:46 +02:00
|
|
|
int pooling_not_outside(int x, int y, int lower_bound, int upper_bound) {
|
|
|
|
return !(x < lower_bound || y < lower_bound || x >= upper_bound || y>= upper_bound);
|
|
|
|
}
|
2023-02-15 11:42:24 +01:00
|
|
|
|
|
|
|
/*
|
|
|
|
* Average Pooling
|
|
|
|
*/
|
|
|
|
#ifdef __CUDACC__
|
2023-05-13 13:37:46 +02:00
|
|
|
__global__ void make_average_pooling_kernel(float*** input, float*** output, int size, int output_depth, int output_width, int stride, int padding) {
|
2023-02-15 11:42:24 +01:00
|
|
|
// Équivalents respectifs de i, j et k dans la boucle effectuée par le cpu
|
|
|
|
int idx = threadIdx.x + blockDim.x*blockIdx.x; // < output_depth
|
2023-03-03 21:59:51 +01:00
|
|
|
int idy = threadIdx.y + blockDim.y*blockIdx.y; // < output_width
|
|
|
|
int idz = threadIdx.z + blockDim.z*blockIdx.z; // < output_width
|
2023-05-13 13:37:46 +02:00
|
|
|
int max_move = size - padding;
|
2023-05-13 15:39:22 +02:00
|
|
|
int input_dim = output_width*stride - 2*padding + size - stride;
|
2023-02-15 11:42:24 +01:00
|
|
|
|
2023-03-03 21:59:51 +01:00
|
|
|
if (idx >= output_depth || idy >= output_width || idz >= output_width) {
|
2023-02-15 11:42:24 +01:00
|
|
|
return;
|
|
|
|
}
|
|
|
|
|
2023-05-13 13:37:46 +02:00
|
|
|
int nb_elements = 0;
|
2023-02-27 18:53:13 +01:00
|
|
|
float sum = 0;
|
2023-02-15 11:42:24 +01:00
|
|
|
|
2023-05-13 13:37:46 +02:00
|
|
|
for (int a=-padding; a < max_move; a++) {
|
|
|
|
for (int b=-padding; b < max_move; b++) {
|
|
|
|
int idy_2 = stride*idy +a;
|
|
|
|
int idz_2 = stride*idz +b;
|
2023-05-13 15:39:22 +02:00
|
|
|
if (pooling_not_outside(idy_2, idz_2, 0, input_dim)) {
|
2023-05-13 13:37:46 +02:00
|
|
|
sum += input[idx][idy_2][idz_2];
|
2023-05-13 15:39:22 +02:00
|
|
|
nb_elements++;
|
2023-05-13 13:37:46 +02:00
|
|
|
}
|
2023-02-15 11:42:24 +01:00
|
|
|
}
|
|
|
|
}
|
2023-05-13 13:37:46 +02:00
|
|
|
output[idx][idy][idz] = sum/(float)nb_elements;
|
2023-02-15 11:42:24 +01:00
|
|
|
}
|
|
|
|
|
2023-05-13 13:37:46 +02:00
|
|
|
void make_average_pooling_device(float*** input, float*** output, int size, int output_depth, int output_width, int stride, int padding) {
|
2023-02-15 11:42:24 +01:00
|
|
|
// Make computation
|
2023-03-03 21:59:51 +01:00
|
|
|
dim3 gridSize(i_div_up(output_depth, BLOCKSIZE_x), i_div_up(output_width, BLOCKSIZE_y), i_div_up(output_width, BLOCKSIZE_z));
|
2023-02-15 11:42:24 +01:00
|
|
|
dim3 blockSize(BLOCKSIZE_x, BLOCKSIZE_y, BLOCKSIZE_z);
|
|
|
|
|
2023-05-13 13:37:46 +02:00
|
|
|
make_average_pooling_kernel<<<gridSize, blockSize>>>(input, output, size, output_depth, output_width, stride, padding);
|
2023-02-15 11:42:24 +01:00
|
|
|
gpuErrchk( cudaPeekAtLastError() );
|
|
|
|
gpuErrchk( cudaDeviceSynchronize() );
|
|
|
|
}
|
|
|
|
#endif
|
|
|
|
|
2023-05-13 13:37:46 +02:00
|
|
|
void make_average_pooling_cpu(float*** input, float*** output, int size, int output_depth, int output_width, int stride, int padding) {
|
2023-03-03 21:59:51 +01:00
|
|
|
// input[output_depth][output_width+size-1][output_width+size-1]
|
|
|
|
// output[output_depth][output_width][output_width]
|
2023-05-13 13:37:46 +02:00
|
|
|
int max_move = size - padding;
|
2023-05-13 15:39:22 +02:00
|
|
|
int input_dim = output_width*stride - 2*padding + size - stride;
|
2023-02-15 11:42:24 +01:00
|
|
|
|
|
|
|
for (int i=0; i < output_depth; i++) {
|
2023-03-03 21:59:51 +01:00
|
|
|
for (int j=0; j < output_width; j++) {
|
|
|
|
for (int k=0; k < output_width; k++) {
|
2023-05-13 13:37:46 +02:00
|
|
|
float sum = 0.;
|
|
|
|
int nb_elements = 0;
|
|
|
|
for (int a=-padding; a < max_move; a++) {
|
|
|
|
for (int b=-padding; b < max_move; b++) {
|
|
|
|
int j_2 = stride*j +a;
|
|
|
|
int k_2 = stride*k +b;
|
2023-05-13 15:39:22 +02:00
|
|
|
if (pooling_not_outside(j_2, k_2, 0, input_dim)) {
|
2023-05-13 13:37:46 +02:00
|
|
|
sum += input[i][j_2][k_2];
|
|
|
|
nb_elements++;
|
|
|
|
}
|
2023-02-15 11:42:24 +01:00
|
|
|
}
|
|
|
|
}
|
2023-05-13 13:37:46 +02:00
|
|
|
output[i][j][k] = sum/(float)nb_elements;
|
2023-02-15 11:42:24 +01:00
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
#ifdef __CUDACC__
|
|
|
|
extern "C"
|
|
|
|
#endif
|
2023-05-13 13:37:46 +02:00
|
|
|
void make_average_pooling(float*** input, float*** output, int size, int output_depth, int output_width, int stride, int padding) {
|
2023-02-15 11:42:24 +01:00
|
|
|
#ifndef __CUDACC__
|
2023-05-13 13:37:46 +02:00
|
|
|
make_average_pooling_cpu(input, output, size, output_depth, output_width, stride, padding);
|
2023-02-15 11:42:24 +01:00
|
|
|
#else
|
2023-05-13 13:37:46 +02:00
|
|
|
make_average_pooling_device(input, output, size, output_depth, output_width, stride, padding);
|
2023-02-15 11:42:24 +01:00
|
|
|
#endif
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
/*
|
|
|
|
* Max Pooling
|
|
|
|
*/
|
|
|
|
#ifdef __CUDACC__
|
2023-05-13 13:37:46 +02:00
|
|
|
__global__ void make_max_pooling_kernel(float*** input, float*** output, int size, int output_depth, int output_width, int stride, int padding) {
|
2023-02-15 11:42:24 +01:00
|
|
|
// Équivalents respectifs de i, j et k dans la boucle effectuée par le cpu
|
|
|
|
int idx = threadIdx.x + blockDim.x*blockIdx.x; // < output_depth
|
2023-03-03 21:59:51 +01:00
|
|
|
int idy = threadIdx.y + blockDim.y*blockIdx.y; // < output_width
|
|
|
|
int idz = threadIdx.z + blockDim.z*blockIdx.z; // < output_width
|
2023-05-13 15:39:22 +02:00
|
|
|
int input_dim = output_width*stride - 2*padding + size - stride;
|
2023-02-15 11:42:24 +01:00
|
|
|
|
2023-03-03 21:59:51 +01:00
|
|
|
if (idx >= output_depth || idy >= output_width || idz >= output_width) {
|
2023-02-15 11:42:24 +01:00
|
|
|
return;
|
|
|
|
}
|
|
|
|
|
2023-05-13 13:37:46 +02:00
|
|
|
int max_move = size - padding;
|
2023-03-10 18:20:10 +01:00
|
|
|
float m = -FLT_MAX;
|
2023-02-15 11:42:24 +01:00
|
|
|
float temp;
|
|
|
|
|
2023-05-13 13:37:46 +02:00
|
|
|
for (int a=-padding; a < max_move; a++) {
|
|
|
|
for (int b=-padding; b < max_move; b++) {
|
|
|
|
int idy_2 = stride*idy +a;
|
|
|
|
int idz_2 = stride*idz +b;
|
2023-05-13 15:39:22 +02:00
|
|
|
if (pooling_not_outside(idy_2, idz_2, 0, input_dim)) {
|
2023-05-13 13:37:46 +02:00
|
|
|
temp = input[idx][idy_2][idz_2];
|
|
|
|
m = m > temp ? m : temp; // max(m, temp)
|
|
|
|
}
|
2023-02-15 11:42:24 +01:00
|
|
|
}
|
|
|
|
}
|
|
|
|
output[idx][idy][idz] = m;
|
|
|
|
}
|
|
|
|
|
2023-05-13 13:37:46 +02:00
|
|
|
void make_max_pooling_device(float*** input, float*** output, int size, int output_depth, int output_width, int stride, int padding) {
|
2023-02-15 11:42:24 +01:00
|
|
|
// Make computation
|
2023-03-03 21:59:51 +01:00
|
|
|
dim3 gridSize(i_div_up(output_depth, BLOCKSIZE_x), i_div_up(output_width, BLOCKSIZE_y), i_div_up(output_width, BLOCKSIZE_z));
|
2023-02-15 11:42:24 +01:00
|
|
|
dim3 blockSize(BLOCKSIZE_x, BLOCKSIZE_y, BLOCKSIZE_z);
|
|
|
|
|
2023-05-13 13:37:46 +02:00
|
|
|
make_max_pooling_kernel<<<gridSize, blockSize>>>(input, output, size, output_depth, output_width, stride, int padding);
|
2023-02-15 11:42:24 +01:00
|
|
|
gpuErrchk( cudaPeekAtLastError() );
|
|
|
|
gpuErrchk( cudaDeviceSynchronize() );
|
|
|
|
}
|
|
|
|
#endif
|
|
|
|
|
2023-05-13 13:37:46 +02:00
|
|
|
void make_max_pooling_cpu(float*** input, float*** output, int size, int output_depth, int output_width, int stride, int padding) {
|
2023-03-03 21:59:51 +01:00
|
|
|
// input[output_depth][output_width+size-1][output_width+size-1]
|
|
|
|
// output[output_depth][output_width][output_width]
|
2023-05-13 13:37:46 +02:00
|
|
|
int max_move = size - padding;
|
2023-05-13 15:39:22 +02:00
|
|
|
int input_dim = output_width*stride - 2*padding + size - stride;
|
2023-02-15 11:42:24 +01:00
|
|
|
float m;
|
|
|
|
for (int i=0; i < output_depth; i++) {
|
2023-03-03 21:59:51 +01:00
|
|
|
for (int j=0; j < output_width; j++) {
|
|
|
|
for (int k=0; k < output_width; k++) {
|
2023-03-10 18:20:10 +01:00
|
|
|
m = -FLT_MAX;
|
2023-05-13 13:37:46 +02:00
|
|
|
for (int a=-padding; a < max_move; a++) {
|
|
|
|
for (int b=-padding; b < max_move; b++) {
|
|
|
|
int j_2 = stride*j +a;
|
|
|
|
int k_2 = stride*k +b;
|
2023-05-13 15:39:22 +02:00
|
|
|
if (pooling_not_outside(j_2, k_2, 0, input_dim)) {
|
2023-05-13 13:37:46 +02:00
|
|
|
m = fmaxf(m, input[i][j_2][k_2]);
|
|
|
|
}
|
2023-02-15 11:42:24 +01:00
|
|
|
}
|
|
|
|
}
|
|
|
|
output[i][j][k] = m;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
#ifdef __CUDACC__
|
|
|
|
extern "C"
|
|
|
|
#endif
|
2023-05-13 13:37:46 +02:00
|
|
|
void make_max_pooling(float*** input, float*** output, int size, int output_depth, int output_width, int stride, int padding) {
|
2023-02-15 11:42:24 +01:00
|
|
|
#ifndef __CUDACC__
|
2023-05-13 13:37:46 +02:00
|
|
|
make_max_pooling_cpu(input, output, size, output_depth, output_width, stride, padding);
|
2023-02-15 11:42:24 +01:00
|
|
|
#else
|
2023-05-13 13:37:46 +02:00
|
|
|
make_max_pooling_device(input, output, size, output_depth, output_width, stride, padding);
|
2023-02-15 11:42:24 +01:00
|
|
|
#endif
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
/*
|
|
|
|
* Dense
|
|
|
|
*/
|
|
|
|
#ifdef __CUDACC__
|
|
|
|
__global__ void make_dense_kernel(Kernel_nn* kernel, float* input, float* output, int size_input, int size_output) {
|
|
|
|
// Équivalents respectifs de i, j et k dans la boucle effectuée par le cpu
|
|
|
|
int idx = threadIdx.x + blockDim.x*blockIdx.x; // < size_output
|
|
|
|
|
|
|
|
if (idx >= size_output) {
|
|
|
|
return;
|
|
|
|
}
|
|
|
|
float f = kernel->bias[idx];
|
|
|
|
|
|
|
|
for (int j=0; j < size_input; j++) {
|
|
|
|
f += kernel->weights[j][idx]*input[j];
|
|
|
|
}
|
|
|
|
output[idx] = f;
|
|
|
|
}
|
|
|
|
|
|
|
|
void make_dense_device(Kernel_nn* kernel, float* input, float* output, int size_input, int size_output) {
|
|
|
|
// Make computation
|
|
|
|
dim3 gridSize(i_div_up(size_output, BLOCKSIZE_x*BLOCKSIZE_y), 1, 1);
|
|
|
|
dim3 blockSize(BLOCKSIZE_x*BLOCKSIZE_y, 1, BLOCKSIZE_z);
|
|
|
|
|
|
|
|
make_dense_kernel<<<gridSize, blockSize>>>(kernel, input, output, size_input, size_output);
|
|
|
|
gpuErrchk( cudaPeekAtLastError() );
|
|
|
|
gpuErrchk( cudaDeviceSynchronize() );
|
|
|
|
}
|
|
|
|
#endif
|
|
|
|
|
|
|
|
#ifdef __CUDACC__
|
|
|
|
extern "C"
|
|
|
|
#endif
|
|
|
|
void make_dense_cpu(Kernel_nn* kernel, float* input, float* output, int size_input, int size_output) {
|
|
|
|
// input[size_input]
|
|
|
|
// output[size_output]
|
|
|
|
float f;
|
|
|
|
|
|
|
|
for (int i=0; i < size_output; i++) {
|
|
|
|
f = kernel->bias[i];
|
|
|
|
for (int j=0; j < size_input; j++) {
|
|
|
|
f += kernel->weights[j][i]*input[j];
|
|
|
|
}
|
|
|
|
output[i] = f;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
#ifdef __CUDACC__
|
|
|
|
extern "C"
|
|
|
|
#endif
|
|
|
|
void make_dense(Kernel_nn* kernel, float* input, float* output, int size_input, int size_output) {
|
|
|
|
#ifndef __CUDACC__
|
|
|
|
make_dense_cpu(kernel, input, output, size_input, size_output);
|
|
|
|
#else
|
|
|
|
make_dense_device(kernel, input, output, size_input, size_output);
|
|
|
|
#endif
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
/*
|
2023-02-28 11:47:57 +01:00
|
|
|
* Dense linearized
|
2023-02-15 11:42:24 +01:00
|
|
|
*/
|
|
|
|
#ifdef __CUDACC__
|
2023-02-28 11:47:57 +01:00
|
|
|
__global__ void make_dense_linearized_kernel(float** weights, float* bias, float*** input, float* output, int depth_input, int dim_input, int size_output) {
|
2023-02-15 11:42:24 +01:00
|
|
|
// Équivalents respectifs de i, j et k dans la boucle effectuée par le cpu
|
|
|
|
int idx = threadIdx.x + blockDim.x*blockIdx.x; // < size_output
|
|
|
|
|
|
|
|
if (idx >= size_output) {
|
|
|
|
return;
|
|
|
|
}
|
2023-02-28 11:47:57 +01:00
|
|
|
float f = bias[idx];
|
2023-02-15 11:42:24 +01:00
|
|
|
|
|
|
|
for (int i=0; i < depth_input; i++) {
|
|
|
|
for (int j=0; j < dim_input; j++) {
|
|
|
|
for (int k=0; k < dim_input; k++) {
|
2023-02-22 15:08:14 +01:00
|
|
|
f += input[i][j][k]*weights[k + j*dim_input + i*depth_input][idx];
|
2023-02-15 11:42:24 +01:00
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
output[idx] = f;
|
|
|
|
}
|
|
|
|
|
2023-02-28 11:47:57 +01:00
|
|
|
void make_dense_linearized_device(Kernel_nn* kernel, float*** input, float* output, int depth_input, int dim_input, int size_output) {
|
2023-02-15 11:42:24 +01:00
|
|
|
// Make computation
|
|
|
|
dim3 gridSize(i_div_up(size_output, BLOCKSIZE_x*BLOCKSIZE_y), 1, 1);
|
|
|
|
dim3 blockSize(BLOCKSIZE_x*BLOCKSIZE_y, 1, BLOCKSIZE_z);
|
|
|
|
|
2023-02-28 11:47:57 +01:00
|
|
|
make_dense_linearized_kernel<<<gridSize, blockSize>>>(kernel->weights, kernel->bias, input, output, depth_input, dim_input, size_output);
|
2023-02-15 11:42:24 +01:00
|
|
|
gpuErrchk( cudaPeekAtLastError() );
|
|
|
|
gpuErrchk( cudaDeviceSynchronize() );
|
|
|
|
}
|
|
|
|
#endif
|
|
|
|
|
2023-02-28 11:47:57 +01:00
|
|
|
void make_dense_linearized_cpu(Kernel_nn* kernel, float*** input, float* output, int depth_input, int dim_input, int size_output) {
|
2023-02-15 11:42:24 +01:00
|
|
|
// input[depth_input][dim_input][dim_input]
|
|
|
|
// output[size_output]
|
|
|
|
float f;
|
|
|
|
|
|
|
|
for (int l=0; l < size_output; l++) {
|
2023-02-28 11:47:57 +01:00
|
|
|
f = kernel->bias[l];
|
2023-02-15 11:42:24 +01:00
|
|
|
for (int i=0; i < depth_input; i++) {
|
|
|
|
for (int j=0; j < dim_input; j++) {
|
|
|
|
for (int k=0; k < dim_input; k++) {
|
|
|
|
f += input[i][j][k]*kernel->weights[k + j*dim_input + i*depth_input][l];
|
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
output[l] = f;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
#ifdef __CUDACC__
|
|
|
|
extern "C"
|
|
|
|
#endif
|
2023-02-28 11:47:57 +01:00
|
|
|
void make_dense_linearized(Kernel_nn* kernel, float*** input, float* output, int depth_input, int dim_input, int size_output) {
|
2023-02-15 11:42:24 +01:00
|
|
|
#ifndef __CUDACC__
|
2023-02-28 11:47:57 +01:00
|
|
|
make_dense_linearized_cpu(kernel, input, output, depth_input, dim_input, size_output);
|
2023-02-15 11:42:24 +01:00
|
|
|
#else
|
2023-02-28 11:47:57 +01:00
|
|
|
make_dense_linearized_device(kernel, input, output, depth_input, dim_input, size_output);
|
2023-02-15 11:42:24 +01:00
|
|
|
#endif
|
|
|
|
}
|