mirror of
https://github.com/augustin64/projet-tipe
synced 2025-01-23 15:16:26 +01:00
Ajout de 'stride' dans 'make_convolution'
This commit is contained in:
parent
c23a126faa
commit
6e022fbd44
@ -189,7 +189,7 @@ void forward_propagation(Network* network) {
|
|||||||
* On copie les valeurs de output dans output_z, puis on applique la fonction d'activation à output_z
|
* On copie les valeurs de output dans output_z, puis on applique la fonction d'activation à output_z
|
||||||
*/
|
*/
|
||||||
if (k_i->cnn) { // Convolution
|
if (k_i->cnn) { // Convolution
|
||||||
make_convolution(k_i->cnn, input, output, output_width);
|
make_convolution(k_i->cnn, input, output, output_width, 1);
|
||||||
copy_3d_array(output, output_z, output_depth, output_width, output_width);
|
copy_3d_array(output, output_z, output_depth, output_width, output_width);
|
||||||
apply_function_to_matrix(activation, output, output_depth, output_width);
|
apply_function_to_matrix(activation, output, output_depth, output_width);
|
||||||
}
|
}
|
||||||
|
@ -5,24 +5,27 @@
|
|||||||
#include "include/struct.h"
|
#include "include/struct.h"
|
||||||
#include "../include/utils.h"
|
#include "../include/utils.h"
|
||||||
|
|
||||||
|
|
||||||
#include "include/config.h"
|
#include "include/config.h"
|
||||||
|
|
||||||
|
|
||||||
void make_convolution_cpu(Kernel_cnn* kernel, float*** input, float*** output, int output_dim) {
|
void make_convolution_cpu(Kernel_cnn* kernel, float*** input, float*** output, int output_dim, int stride) {
|
||||||
// c'est le kernel de input
|
// c'est le kernel de input
|
||||||
// input[kernel->rows][kernel_k_size + output_dim-1][kernel_k_size + output_dim-1]
|
// input[kernel->rows][kernel_k_size + output_dim-1][kernel_k_size + output_dim-1]
|
||||||
// output[kernel->columns][output_dim][output_dim]
|
// output[kernel->columns][output_dim][output_dim]
|
||||||
|
|
||||||
|
int k_size = kernel->k_size;
|
||||||
|
int k_columns = kernel->columns;
|
||||||
|
int k_rows = kernel->rows;
|
||||||
float f;
|
float f;
|
||||||
|
|
||||||
for (int i=0; i < kernel->columns; i++) { // filtre
|
for (int i=0; i < k_columns; i++) { // filtre
|
||||||
for (int j=0; j < output_dim; j++) { // ligne de sortie
|
for (int j=0; j < output_dim; j++) { // ligne de sortie
|
||||||
for (int k=0; k < output_dim; k++) { // colonne de sortie
|
for (int k=0; k < output_dim; k++) { // colonne de sortie
|
||||||
f = kernel->bias[i][j][k];
|
f = kernel->bias[i][j][k];
|
||||||
for (int a=0; a < kernel->rows; a++) { // Canal de couleur
|
for (int a=0; a < k_rows; a++) { // Canal de couleur
|
||||||
for (int b=0; b < kernel->k_size; b++) { // ligne du filtre
|
for (int b=0; b < k_size; b++) { // ligne du filtre
|
||||||
for (int c=0; c < kernel->k_size; c++) { // colonne du filtre
|
for (int c=0; c < k_size; c++) { // colonne du filtre
|
||||||
f += kernel->weights[a][i][b][c]*input[a][j+b][k+c];
|
f += kernel->weights[a][i][b][c]*input[a][stride*j+b][stride*k+c];
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
@ -34,7 +37,7 @@ void make_convolution_cpu(Kernel_cnn* kernel, float*** input, float*** output, i
|
|||||||
|
|
||||||
#ifdef __CUDACC__
|
#ifdef __CUDACC__
|
||||||
|
|
||||||
__global__ void make_convolution_kernel(Kernel_cnn* kernel, float*** input, float*** output, int output_dim) {
|
__global__ void make_convolution_kernel(Kernel_cnn* kernel, float*** input, float*** output, int output_dim, int stride) {
|
||||||
// Équivalents respectifs de i, j et k dans la boucle effectuée par le cpu
|
// Équivalents respectifs de i, j et k dans la boucle effectuée par le cpu
|
||||||
int idx = threadIdx.x + blockDim.x*blockIdx.x; // < kernel->columns
|
int idx = threadIdx.x + blockDim.x*blockIdx.x; // < kernel->columns
|
||||||
int idy = threadIdx.y + blockDim.y*blockIdx.y; // < min(output_dim, k_size)
|
int idy = threadIdx.y + blockDim.y*blockIdx.y; // < min(output_dim, k_size)
|
||||||
@ -49,7 +52,7 @@ __global__ void make_convolution_kernel(Kernel_cnn* kernel, float*** input, floa
|
|||||||
for (int a=0; a < kernel->rows; a++) {
|
for (int a=0; a < kernel->rows; a++) {
|
||||||
for (int b=0; b < kernel->k_size; b++) {
|
for (int b=0; b < kernel->k_size; b++) {
|
||||||
for (int c=0; c < kernel->k_size; c++) {
|
for (int c=0; c < kernel->k_size; c++) {
|
||||||
f += kernel->weights[a][idx][b][c]*input[a][idy+b][idz+c];
|
f += kernel->weights[a][idx][b][c]*input[a][idy*stride+b][idz*stride+c];
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
@ -57,21 +60,21 @@ __global__ void make_convolution_kernel(Kernel_cnn* kernel, float*** input, floa
|
|||||||
output[idx][idy][idz] = f;
|
output[idx][idy][idz] = f;
|
||||||
}
|
}
|
||||||
|
|
||||||
void make_convolution_device(Kernel_cnn* kernel, float*** input, float*** output, int output_dim) {
|
void make_convolution_device(Kernel_cnn* kernel, float*** input, float*** output, int output_dim, int stride) {
|
||||||
// Make computation
|
// Make computation
|
||||||
dim3 gridSize(i_div_up(kernel->columns, BLOCKSIZE_x), i_div_up(output_dim, BLOCKSIZE_y), i_div_up(output_dim, BLOCKSIZE_z));
|
dim3 gridSize(i_div_up(kernel->columns, BLOCKSIZE_x), i_div_up(output_dim, BLOCKSIZE_y), i_div_up(output_dim, BLOCKSIZE_z));
|
||||||
dim3 blockSize(BLOCKSIZE_x, BLOCKSIZE_y, BLOCKSIZE_z);
|
dim3 blockSize(BLOCKSIZE_x, BLOCKSIZE_y, BLOCKSIZE_z);
|
||||||
|
|
||||||
make_convolution_kernel<<<gridSize, blockSize>>>(kernel, input, output, output_dim);
|
make_convolution_kernel<<<gridSize, blockSize>>>(kernel, input, output, output_dim, stride);
|
||||||
gpuErrchk( cudaPeekAtLastError() );
|
gpuErrchk( cudaPeekAtLastError() );
|
||||||
gpuErrchk( cudaDeviceSynchronize() );
|
gpuErrchk( cudaDeviceSynchronize() );
|
||||||
}
|
}
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
void make_convolution(Kernel_cnn* kernel, float*** input, float*** output, int output_dim) {
|
void make_convolution(Kernel_cnn* kernel, float*** input, float*** output, int output_dim, int stride) {
|
||||||
#ifndef __CUDACC__
|
#ifndef __CUDACC__
|
||||||
make_convolution_cpu(kernel, input, output, output_dim);
|
make_convolution_cpu(kernel, input, output, output_dim, stride);
|
||||||
#else
|
#else
|
||||||
make_convolution_device(kernel, input, output, output_dim);
|
make_convolution_device(kernel, input, output, output_dim, stride);
|
||||||
#endif
|
#endif
|
||||||
}
|
}
|
@ -8,20 +8,24 @@
|
|||||||
#include "include/config.h"
|
#include "include/config.h"
|
||||||
|
|
||||||
|
|
||||||
void make_convolution_cpu(Kernel_cnn* kernel, float*** input, float*** output, int output_dim) {
|
void make_convolution_cpu(Kernel_cnn* kernel, float*** input, float*** output, int output_dim, int stride) {
|
||||||
// c'est le kernel de input
|
// c'est le kernel de input
|
||||||
// input[kernel->rows][kernel_k_size + output_dim-1][kernel_k_size + output_dim-1]
|
// input[kernel->rows][kernel_k_size + output_dim-1][kernel_k_size + output_dim-1]
|
||||||
// output[kernel->columns][output_dim][output_dim]
|
// output[kernel->columns][output_dim][output_dim]
|
||||||
|
|
||||||
|
int k_size = kernel->k_size;
|
||||||
|
int k_columns = kernel->columns;
|
||||||
|
int k_rows = kernel->rows;
|
||||||
float f;
|
float f;
|
||||||
|
|
||||||
for (int i=0; i < kernel->columns; i++) { // filtre
|
for (int i=0; i < k_columns; i++) { // filtre
|
||||||
for (int j=0; j < output_dim; j++) { // ligne de sortie
|
for (int j=0; j < output_dim; j++) { // ligne de sortie
|
||||||
for (int k=0; k < output_dim; k++) { // colonne de sortie
|
for (int k=0; k < output_dim; k++) { // colonne de sortie
|
||||||
f = kernel->bias[i][j][k];
|
f = kernel->bias[i][j][k];
|
||||||
for (int a=0; a < kernel->rows; a++) { // Canal de couleur
|
for (int a=0; a < k_rows; a++) { // Canal de couleur
|
||||||
for (int b=0; b < kernel->k_size; b++) { // ligne du filtre
|
for (int b=0; b < k_size; b++) { // ligne du filtre
|
||||||
for (int c=0; c < kernel->k_size; c++) { // colonne du filtre
|
for (int c=0; c < k_size; c++) { // colonne du filtre
|
||||||
f += kernel->weights[a][i][b][c]*input[a][j+b][k+c];
|
f += kernel->weights[a][i][b][c]*input[a][stride*j+b][stride*k+c];
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
@ -33,7 +37,7 @@ void make_convolution_cpu(Kernel_cnn* kernel, float*** input, float*** output, i
|
|||||||
|
|
||||||
#ifdef __CUDACC__
|
#ifdef __CUDACC__
|
||||||
|
|
||||||
__global__ void make_convolution_kernel(Kernel_cnn* kernel, float*** input, float*** output, int output_dim) {
|
__global__ void make_convolution_kernel(Kernel_cnn* kernel, float*** input, float*** output, int output_dim, int stride) {
|
||||||
// Équivalents respectifs de i, j et k dans la boucle effectuée par le cpu
|
// Équivalents respectifs de i, j et k dans la boucle effectuée par le cpu
|
||||||
int idx = threadIdx.x + blockDim.x*blockIdx.x; // < kernel->columns
|
int idx = threadIdx.x + blockDim.x*blockIdx.x; // < kernel->columns
|
||||||
int idy = threadIdx.y + blockDim.y*blockIdx.y; // < min(output_dim, k_size)
|
int idy = threadIdx.y + blockDim.y*blockIdx.y; // < min(output_dim, k_size)
|
||||||
@ -48,7 +52,7 @@ __global__ void make_convolution_kernel(Kernel_cnn* kernel, float*** input, floa
|
|||||||
for (int a=0; a < kernel->rows; a++) {
|
for (int a=0; a < kernel->rows; a++) {
|
||||||
for (int b=0; b < kernel->k_size; b++) {
|
for (int b=0; b < kernel->k_size; b++) {
|
||||||
for (int c=0; c < kernel->k_size; c++) {
|
for (int c=0; c < kernel->k_size; c++) {
|
||||||
f += kernel->weights[a][idx][b][c]*input[a][idy+b][idz+c];
|
f += kernel->weights[a][idx][b][c]*input[a][idy*stride+b][idz*stride+c];
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
@ -56,22 +60,21 @@ __global__ void make_convolution_kernel(Kernel_cnn* kernel, float*** input, floa
|
|||||||
output[idx][idy][idz] = f;
|
output[idx][idy][idz] = f;
|
||||||
}
|
}
|
||||||
|
|
||||||
void make_convolution_device(Kernel_cnn* kernel, float*** input, float*** output, int output_dim) {
|
void make_convolution_device(Kernel_cnn* kernel, float*** input, float*** output, int output_dim, int stride) {
|
||||||
// Make computation
|
// Make computation
|
||||||
dim3 gridSize(i_div_up(kernel->columns, BLOCKSIZE_x), i_div_up(output_dim, BLOCKSIZE_y), i_div_up(output_dim, BLOCKSIZE_z));
|
dim3 gridSize(i_div_up(kernel->columns, BLOCKSIZE_x), i_div_up(output_dim, BLOCKSIZE_y), i_div_up(output_dim, BLOCKSIZE_z));
|
||||||
dim3 blockSize(BLOCKSIZE_x, BLOCKSIZE_y, BLOCKSIZE_z);
|
dim3 blockSize(BLOCKSIZE_x, BLOCKSIZE_y, BLOCKSIZE_z);
|
||||||
|
|
||||||
make_convolution_kernel<<<gridSize, blockSize>>>(kernel, input, output, output_dim);
|
make_convolution_kernel<<<gridSize, blockSize>>>(kernel, input, output, output_dim, stride);
|
||||||
gpuErrchk( cudaPeekAtLastError() );
|
gpuErrchk( cudaPeekAtLastError() );
|
||||||
gpuErrchk( cudaDeviceSynchronize() );
|
gpuErrchk( cudaDeviceSynchronize() );
|
||||||
}
|
}
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
extern "C"
|
void make_convolution(Kernel_cnn* kernel, float*** input, float*** output, int output_dim, int stride) {
|
||||||
void make_convolution(Kernel_cnn* kernel, float*** input, float*** output, int output_dim) {
|
|
||||||
#ifndef __CUDACC__
|
#ifndef __CUDACC__
|
||||||
make_convolution_cpu(kernel, input, output, output_dim);
|
make_convolution_cpu(kernel, input, output, output_dim, stride);
|
||||||
#else
|
#else
|
||||||
make_convolution_device(kernel, input, output, output_dim);
|
make_convolution_device(kernel, input, output, output_dim, stride);
|
||||||
#endif
|
#endif
|
||||||
}
|
}
|
@ -3,21 +3,21 @@
|
|||||||
/*
|
/*
|
||||||
* Effectue la convolution naïvement sur le processeur
|
* Effectue la convolution naïvement sur le processeur
|
||||||
*/
|
*/
|
||||||
void make_convolution_cpu(Kernel_cnn* kernel, float*** input, float*** output, int output_dim);
|
void make_convolution_cpu(Kernel_cnn* kernel, float*** input, float*** output, int output_dim, int stride);
|
||||||
|
|
||||||
#ifdef __CUDACC__
|
#ifdef __CUDACC__
|
||||||
/*
|
/*
|
||||||
* Kernel de la convolution sur carte graphique
|
* Kernel de la convolution sur carte graphique
|
||||||
*/
|
*/
|
||||||
__global__ void make_convolution_kernel(int k_size, int columns, int rows, float* bias, size_t pitch_bias, float**** weights, size_t pitch_weights, float*** input, size_t pitch_input, float*** output, size_t pitch_output, int output_dim);
|
__global__ void make_convolution_kernel(int k_size, int columns, int rows, float* bias, size_t pitch_bias, float**** weights, size_t pitch_weights, float*** input, size_t pitch_input, float*** output, size_t pitch_output, int output_dim, int stride);
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* Effectue la convolution naïvement sur la carte graphique
|
* Effectue la convolution naïvement sur la carte graphique
|
||||||
*/
|
*/
|
||||||
void make_convolution_device(Kernel_cnn* kernel, float*** input, float*** output, int output_dim);
|
void make_convolution_device(Kernel_cnn* kernel, float*** input, float*** output, int output_dim, int stride);
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* Détermine si la convolution peut-être faite sur la carte graphique au moment de la compilation
|
* Détermine si la convolution peut-être faite sur la carte graphique au moment de la compilation
|
||||||
*/
|
*/
|
||||||
void make_convolution(Kernel_cnn* kernel, float*** input, float*** output, int output_dim);
|
void make_convolution(Kernel_cnn* kernel, float*** input, float*** output, int output_dim, int stride);
|
@ -6,12 +6,12 @@
|
|||||||
/*
|
/*
|
||||||
* Effectue une convolution sans stride sur le processeur
|
* Effectue une convolution sans stride sur le processeur
|
||||||
*/
|
*/
|
||||||
void make_convolution_cpu(Kernel_cnn* kernel, float*** input, float*** output, int output_dim);
|
void make_convolution_cpu(Kernel_cnn* kernel, float*** input, float*** output, int output_dim, int stride);
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* Effectue la convolution sur le CPU ou GPU
|
* Effectue la convolution sur le CPU ou GPU
|
||||||
*/
|
*/
|
||||||
void make_convolution(Kernel_cnn* kernel, float*** input, float*** output, int output_dim);
|
void make_convolution(Kernel_cnn* kernel, float*** input, float*** output, int output_dim, int stride);
|
||||||
|
|
||||||
#ifdef __CUDACC__
|
#ifdef __CUDACC__
|
||||||
extern "C"
|
extern "C"
|
||||||
|
@ -157,7 +157,7 @@ void run_convolution_test(int input_dim, int output_dim, int rows, int columns)
|
|||||||
double cpu_time_used, gpu_time_used;
|
double cpu_time_used, gpu_time_used;
|
||||||
|
|
||||||
start = clock();
|
start = clock();
|
||||||
make_convolution_device(kernel, input, output_gpu, output_dim);
|
make_convolution_device(kernel, input, output_gpu, output_dim, 1);
|
||||||
end = clock();
|
end = clock();
|
||||||
|
|
||||||
gpu_time_used = ((double) (end - start)) / CLOCKS_PER_SEC;
|
gpu_time_used = ((double) (end - start)) / CLOCKS_PER_SEC;
|
||||||
@ -165,7 +165,7 @@ void run_convolution_test(int input_dim, int output_dim, int rows, int columns)
|
|||||||
|
|
||||||
|
|
||||||
start = clock();
|
start = clock();
|
||||||
make_convolution_cpu(kernel, input, output_cpu, output_dim);
|
make_convolution_cpu(kernel, input, output_cpu, output_dim, 1);
|
||||||
end = clock();
|
end = clock();
|
||||||
|
|
||||||
cpu_time_used = ((double) (end - start)) / CLOCKS_PER_SEC;
|
cpu_time_used = ((double) (end - start)) / CLOCKS_PER_SEC;
|
||||||
|
@ -140,7 +140,7 @@ void run_convolution_test(int input_dim, int output_dim, int rows, int columns)
|
|||||||
double cpu_time_used, gpu_time_used;
|
double cpu_time_used, gpu_time_used;
|
||||||
|
|
||||||
start_time = omp_get_wtime();
|
start_time = omp_get_wtime();
|
||||||
make_convolution_device(kernel, input, output_gpu, output_dim);
|
make_convolution_device(kernel, input, output_gpu, output_dim, 1);
|
||||||
end_time = omp_get_wtime();
|
end_time = omp_get_wtime();
|
||||||
|
|
||||||
|
|
||||||
@ -149,7 +149,7 @@ void run_convolution_test(int input_dim, int output_dim, int rows, int columns)
|
|||||||
|
|
||||||
|
|
||||||
start_time = omp_get_wtime();
|
start_time = omp_get_wtime();
|
||||||
make_convolution_cpu(kernel, input, output_cpu, output_dim);
|
make_convolution_cpu(kernel, input, output_cpu, output_dim, 1);
|
||||||
end_time = omp_get_wtime();
|
end_time = omp_get_wtime();
|
||||||
|
|
||||||
cpu_time_used = end_time - start_time;
|
cpu_time_used = end_time - start_time;
|
||||||
|
Loading…
Reference in New Issue
Block a user