2022-11-01 17:24:29 +01:00
|
|
|
#include <stdlib.h>
|
|
|
|
#include <stdio.h>
|
|
|
|
#include <stdbool.h>
|
|
|
|
|
|
|
|
#include "include/struct.h"
|
2023-05-12 16:16:34 +02:00
|
|
|
#include "../common/include/utils.h"
|
2022-11-09 12:55:55 +01:00
|
|
|
|
2023-03-28 12:54:49 +02:00
|
|
|
#include "include/config.h"
|
2022-11-11 11:20:30 +01:00
|
|
|
|
2023-05-13 22:42:13 +02:00
|
|
|
#ifdef __CUDACC__
|
|
|
|
__host__ __device__
|
|
|
|
#endif
|
2023-05-13 13:37:46 +02:00
|
|
|
int convolution_not_outside(int x, int y, int lower_bound, int upper_bound) {
|
|
|
|
return !(x < lower_bound || y < lower_bound || x >= upper_bound || y>= upper_bound);
|
|
|
|
}
|
|
|
|
|
2023-05-13 17:22:47 +02:00
|
|
|
void make_convolution_cpu(Kernel_cnn* kernel, float*** input, float*** output, int output_width, int stride, int padding) {
|
2022-11-01 17:24:29 +01:00
|
|
|
// c'est le kernel de input
|
2023-05-13 17:22:47 +02:00
|
|
|
// input[kernel->rows][kernel_k_size + output_width-1][kernel_k_size + output_width-1]
|
|
|
|
// output[kernel->columns][output_width][output_width]
|
2023-05-08 11:11:55 +02:00
|
|
|
|
|
|
|
int k_columns = kernel->columns;
|
|
|
|
int k_rows = kernel->rows;
|
2023-05-13 13:37:46 +02:00
|
|
|
int max_move = kernel->k_size - padding;
|
2023-05-13 17:22:47 +02:00
|
|
|
int input_width = output_width*stride - 2*padding + kernel->k_size - stride;
|
2022-11-01 17:24:29 +01:00
|
|
|
float f;
|
2023-01-17 15:34:29 +01:00
|
|
|
|
2023-05-08 11:11:55 +02:00
|
|
|
for (int i=0; i < k_columns; i++) { // filtre
|
2023-05-13 17:22:47 +02:00
|
|
|
for (int j=0; j < output_width; j++) { // ligne de sortie
|
|
|
|
for (int k=0; k < output_width; k++) { // colonne de sortie
|
2023-03-18 13:25:58 +01:00
|
|
|
f = kernel->bias[i][j][k];
|
2023-05-08 11:11:55 +02:00
|
|
|
for (int a=0; a < k_rows; a++) { // Canal de couleur
|
2023-05-13 13:37:46 +02:00
|
|
|
for (int b=-padding; b < max_move; b++) { // ligne du filtre
|
|
|
|
for (int c=-padding; c < max_move; c++) { // colonne du filtre
|
|
|
|
int x = (stride*j+b);
|
|
|
|
int y = (stride*k+c);
|
2023-05-13 17:22:47 +02:00
|
|
|
if (convolution_not_outside(x, y, 0, input_width)) {
|
2023-05-13 13:37:46 +02:00
|
|
|
f += kernel->weights[a][i][b][c]*input[a][stride*j+b][stride*k+c];
|
|
|
|
}
|
2022-11-01 17:24:29 +01:00
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
2022-11-03 11:26:08 +01:00
|
|
|
output[i][j][k] = f;
|
2022-11-01 17:24:29 +01:00
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
#ifdef __CUDACC__
|
|
|
|
|
2023-05-13 17:22:47 +02:00
|
|
|
__global__ void make_convolution_kernel(Kernel_cnn* kernel, float*** input, float*** output, int output_width, int stride, int padding) {
|
2022-11-01 17:24:29 +01:00
|
|
|
// Équivalents respectifs de i, j et k dans la boucle effectuée par le cpu
|
|
|
|
int idx = threadIdx.x + blockDim.x*blockIdx.x; // < kernel->columns
|
2023-05-13 17:22:47 +02:00
|
|
|
int idy = threadIdx.y + blockDim.y*blockIdx.y; // < min(output_width, k_size)
|
|
|
|
int idz = threadIdx.z + blockDim.z*blockIdx.z; // < min(output_width, k_size)
|
2023-05-13 13:37:46 +02:00
|
|
|
int max_move = kernel->k_size - padding;
|
2023-05-13 17:22:47 +02:00
|
|
|
int input_width = output_width*stride - 2*padding + kernel->k_size - stride;
|
2022-11-01 17:24:29 +01:00
|
|
|
|
2023-05-13 17:22:47 +02:00
|
|
|
if (idx >= kernel->columns || idy >= output_width || idz >= output_width) {
|
2022-11-01 17:24:29 +01:00
|
|
|
return;
|
|
|
|
}
|
|
|
|
|
2023-03-18 13:25:58 +01:00
|
|
|
float f = kernel->bias[idx][idy][idz];
|
2023-01-17 15:34:29 +01:00
|
|
|
|
2023-01-28 22:04:38 +01:00
|
|
|
for (int a=0; a < kernel->rows; a++) {
|
2023-05-13 13:37:46 +02:00
|
|
|
for (int b=-padding; b < max_move; b++) {
|
|
|
|
for (int c=-padding; c < max_move; c++) {
|
|
|
|
int idy_2 = idy*stride+b;
|
|
|
|
int idz_2 = idz*stride+c;
|
2023-05-13 17:22:47 +02:00
|
|
|
if (convolution_not_outside(idy_2, idz_2, 0, input_width)) {
|
2023-05-13 13:37:46 +02:00
|
|
|
f += kernel->weights[a][idx][b][c]*input[a][idy_2][idz_2];
|
|
|
|
}
|
2022-11-01 17:24:29 +01:00
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2023-01-28 22:04:38 +01:00
|
|
|
output[idx][idy][idz] = f;
|
2022-11-01 17:24:29 +01:00
|
|
|
}
|
|
|
|
|
2023-05-13 17:22:47 +02:00
|
|
|
void make_convolution_device(Kernel_cnn* kernel, float*** input, float*** output, int output_width, int stride, int padding) {
|
2022-11-01 17:24:29 +01:00
|
|
|
// Make computation
|
2023-05-13 17:22:47 +02:00
|
|
|
dim3 gridSize(i_div_up(kernel->columns, BLOCKSIZE_x), i_div_up(output_width, BLOCKSIZE_y), i_div_up(output_width, BLOCKSIZE_z));
|
2022-11-01 17:24:29 +01:00
|
|
|
dim3 blockSize(BLOCKSIZE_x, BLOCKSIZE_y, BLOCKSIZE_z);
|
|
|
|
|
2023-05-13 17:22:47 +02:00
|
|
|
make_convolution_kernel<<<gridSize, blockSize>>>(kernel, input, output, output_width, stride, padding);
|
2022-11-01 17:24:29 +01:00
|
|
|
gpuErrchk( cudaPeekAtLastError() );
|
|
|
|
gpuErrchk( cudaDeviceSynchronize() );
|
|
|
|
}
|
|
|
|
#endif
|
|
|
|
|
2023-05-13 22:42:13 +02:00
|
|
|
#ifdef __CUDACC__
|
|
|
|
extern "C"
|
|
|
|
#endif
|
2023-05-13 17:22:47 +02:00
|
|
|
void make_convolution(Kernel_cnn* kernel, float*** input, float*** output, int output_width, int stride, int padding) {
|
2022-11-01 17:24:29 +01:00
|
|
|
#ifndef __CUDACC__
|
2023-05-13 17:22:47 +02:00
|
|
|
make_convolution_cpu(kernel, input, output, output_width, stride, padding);
|
2022-11-01 17:24:29 +01:00
|
|
|
#else
|
2023-05-13 17:22:47 +02:00
|
|
|
make_convolution_device(kernel, input, output, output_width, stride, padding);
|
2022-11-01 17:24:29 +01:00
|
|
|
#endif
|
|
|
|
}
|