tipe/src/cnn/convolution.cu

83 lines
2.7 KiB
Plaintext
Raw Normal View History

2022-11-01 17:24:29 +01:00
#include <stdlib.h>
#include <stdio.h>
#include <stdbool.h>
#include "include/struct.h"
2023-01-28 22:04:38 +01:00
#include "../include/utils.h"
2022-11-01 17:24:29 +01:00
#define BLOCKSIZE_x 16
#define BLOCKSIZE_y 8
#define BLOCKSIZE_z 8
void make_convolution_cpu(Kernel_cnn* kernel, float*** input, float*** output, int output_dim) {
// c'est le kernel de input
// input[kernel->rows][kernel_k_size + output_dim-1][kernel_k_size + output_dim-1]
// output[kernel->columns][output_dim][output_dim]
float f;
2023-01-17 15:34:29 +01:00
for (int i=0; i < kernel->columns; i++) { // filtre
for (int j=0; j < output_dim; j++) { // ligne de sortie
for (int k=0; k < output_dim; k++) { // colonne de sortie
2022-11-01 17:24:29 +01:00
f = kernel->bias[i][j][k];
for (int a=0; a < kernel->rows; a++) { // Canal de couleur
for (int b=0; b < kernel->k_size; b++) { // ligne du filtre
for (int c=0; c < kernel->k_size; c++) { // colonne du filtre
2022-11-01 17:24:29 +01:00
f += kernel->w[a][i][b][c]*input[a][j+b][k+c];
}
}
}
2022-11-03 11:26:08 +01:00
output[i][j][k] = f;
2022-11-01 17:24:29 +01:00
}
}
}
}
#ifdef __CUDACC__
2023-01-28 22:04:38 +01:00
__global__ void make_convolution_kernel(Kernel_cnn* kernel, float*** input, float*** output, int output_dim) {
2022-11-01 17:24:29 +01:00
// Équivalents respectifs de i, j et k dans la boucle effectuée par le cpu
int idx = threadIdx.x + blockDim.x*blockIdx.x; // < kernel->columns
int idy = threadIdx.y + blockDim.y*blockIdx.y; // < min(output_dim, k_size)
int idz = threadIdx.z + blockDim.z*blockIdx.z; // < min(output_dim, k_size)
2023-01-28 22:04:38 +01:00
if (idx >= kernel->columns || idy >= output_dim || idz >= output_dim) {
2022-11-01 17:24:29 +01:00
return;
}
2023-01-28 22:04:38 +01:00
float f = kernel->bias[idx][idy][idz];
2022-11-01 17:24:29 +01:00
2023-01-28 22:04:38 +01:00
for (int a=0; a < kernel->rows; a++) {
for (int b=0; b < kernel->k_size; b++) {
for (int c=0; c < kernel->k_size; c++) {
f += kernel->w[a][idx][b][c]*input[a][idy+b][idz+c];
2022-11-01 17:24:29 +01:00
}
}
}
2023-01-28 22:04:38 +01:00
output[idx][idy][idz] = f;
2022-11-01 17:24:29 +01:00
}
void make_convolution_device(Kernel_cnn* kernel, float*** input, float*** output, int output_dim) {
// Make computation
dim3 gridSize(i_div_up(kernel->columns, BLOCKSIZE_x), i_div_up(output_dim, BLOCKSIZE_y), i_div_up(output_dim, BLOCKSIZE_z));
dim3 blockSize(BLOCKSIZE_x, BLOCKSIZE_y, BLOCKSIZE_z);
2023-01-28 22:04:38 +01:00
make_convolution_kernel<<<gridSize, blockSize>>>(kernel, input, output, output_dim);
2022-11-01 17:24:29 +01:00
gpuErrchk( cudaPeekAtLastError() );
gpuErrchk( cudaDeviceSynchronize() );
}
#endif
extern "C" {
2022-11-01 17:24:29 +01:00
void make_convolution(Kernel_cnn* kernel, float*** input, float*** output, int output_dim) {
#ifndef __CUDACC__
make_convolution_cpu(kernel, input, output, output_dim);
#else
make_convolution_device(kernel, input, output, output_dim);
#endif
}
2022-11-01 17:24:29 +01:00
}