mirror of
https://github.com/augustin64/projet-tipe
synced 2025-01-23 15:16:26 +01:00
Implement copy_3d_array in CUDA
This commit is contained in:
parent
57954a27c0
commit
2d6b4fe011
@ -12,6 +12,7 @@
|
|||||||
#include "include/make.h"
|
#include "include/make.h"
|
||||||
|
|
||||||
#include "../include/colors.h"
|
#include "../include/colors.h"
|
||||||
|
#include "../include/utils.h"
|
||||||
#include "include/cnn.h"
|
#include "include/cnn.h"
|
||||||
|
|
||||||
// Augmente les dimensions de l'image d'entrée
|
// Augmente les dimensions de l'image d'entrée
|
||||||
@ -188,7 +189,7 @@ void forward_propagation(Network* network) {
|
|||||||
*/
|
*/
|
||||||
if (k_i->cnn) { // Convolution
|
if (k_i->cnn) { // Convolution
|
||||||
make_convolution(k_i->cnn, input, output, output_width);
|
make_convolution(k_i->cnn, input, output, output_width);
|
||||||
copy_input_to_input_z(output, output_z, output_depth, output_width, output_width);
|
copy_3d_array(output, output_z, output_depth, output_width, output_width);
|
||||||
apply_function_to_matrix(activation, output, output_depth, output_width);
|
apply_function_to_matrix(activation, output, output_depth, output_width);
|
||||||
}
|
}
|
||||||
else if (k_i->nn) { // Full connection
|
else if (k_i->nn) { // Full connection
|
||||||
@ -197,7 +198,7 @@ void forward_propagation(Network* network) {
|
|||||||
} else { // Matrice -> Vecteur
|
} else { // Matrice -> Vecteur
|
||||||
make_dense_linearized(k_i->nn, input, output[0][0], input_depth, input_width, output_width);
|
make_dense_linearized(k_i->nn, input, output[0][0], input_depth, input_width, output_width);
|
||||||
}
|
}
|
||||||
copy_input_to_input_z(output, output_z, 1, 1, output_width);
|
copy_3d_array(output, output_z, 1, 1, output_width);
|
||||||
apply_function_to_vector(activation, output, output_width);
|
apply_function_to_vector(activation, output, output_width);
|
||||||
}
|
}
|
||||||
else { // Pooling
|
else { // Pooling
|
||||||
@ -214,7 +215,7 @@ void forward_propagation(Network* network) {
|
|||||||
printf("identifiant: %d, position: %d\n", pooling, i);
|
printf("identifiant: %d, position: %d\n", pooling, i);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
copy_input_to_input_z(output, output_z, output_depth, output_width, output_width);
|
copy_3d_array(output, output_z, output_depth, output_width, output_width);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
@ -281,16 +282,6 @@ void drop_neurones(float*** input, int depth, int dim1, int dim2, int dropout) {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
void copy_input_to_input_z(float*** output, float*** output_z, int output_depth, int output_rows, int output_columns) {
|
|
||||||
for (int i=0; i<output_depth; i++) {
|
|
||||||
for (int j=0; j<output_rows; j++) {
|
|
||||||
for (int k=0; k<output_columns; k++) {
|
|
||||||
output_z[i][j][k] = output[i][j][k];
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
float compute_mean_squared_error(float* output, float* wanted_output, int len) {
|
float compute_mean_squared_error(float* output, float* wanted_output, int len) {
|
||||||
/*
|
/*
|
||||||
* $E = \frac{ \sum_{i=0}^n (output_i - desired output_i)^2 }{n}$
|
* $E = \frac{ \sum_{i=0}^n (output_i - desired output_i)^2 }{n}$
|
||||||
|
@ -38,4 +38,11 @@ extern "C"
|
|||||||
*/
|
*/
|
||||||
bool check_cuda_compatibility();
|
bool check_cuda_compatibility();
|
||||||
|
|
||||||
|
#ifdef __CUDACC__
|
||||||
|
extern "C"
|
||||||
|
#endif
|
||||||
|
/*
|
||||||
|
* Copier des valeurs d'un tableau de dimension 3 de mémoire partagée
|
||||||
|
*/
|
||||||
|
void copy_3d_array(float*** source, float*** dest, int dimension1, int dimension2, int dimension3);
|
||||||
#endif
|
#endif
|
38
src/utils.c
38
src/utils.c
@ -11,6 +11,10 @@
|
|||||||
|
|
||||||
#include "include/utils.h"
|
#include "include/utils.h"
|
||||||
|
|
||||||
|
#define BLOCKSIZE_x 16
|
||||||
|
#define BLOCKSIZE_y 8
|
||||||
|
#define BLOCKSIZE_z 8
|
||||||
|
|
||||||
|
|
||||||
int i_div_up(int a, int b) { // Partie entière supérieure de a/b
|
int i_div_up(int a, int b) { // Partie entière supérieure de a/b
|
||||||
return ((a % b) != 0) ? (a / b + 1) : (a / b);
|
return ((a % b) != 0) ? (a / b + 1) : (a / b);
|
||||||
@ -55,3 +59,37 @@ bool check_cuda_compatibility() {
|
|||||||
return false;
|
return false;
|
||||||
#endif
|
#endif
|
||||||
}
|
}
|
||||||
|
|
||||||
|
#ifdef __CUDACC__
|
||||||
|
__global__ void copy_3d_array_kernel(float*** source, float*** dest, int dimension1, int dimension2, int dimension3) {
|
||||||
|
int idx = threadIdx.x + blockDim.x*blockIdx.x; // < dimension1
|
||||||
|
int idy = threadIdx.y + blockDim.y*blockIdx.y; // < dimension2
|
||||||
|
int idz = threadIdx.z + blockDim.z*blockIdx.z; // < dimension3
|
||||||
|
|
||||||
|
if (idx >= dimension1 || idy >= dimension2 || idz >= dimension3) {
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
|
||||||
|
dest[idx][idy][idz] = source[idx][idy][idz];
|
||||||
|
}
|
||||||
|
|
||||||
|
void copy_3d_array(float*** source, float*** dest, int dimension1, int dimension2, int dimension3) {
|
||||||
|
dim3 gridSize(i_div_up(dimension1, BLOCKSIZE_x), i_div_up(dimension2, BLOCKSIZE_y), i_div_up(dimension3, BLOCKSIZE_z));
|
||||||
|
dim3 blockSize(BLOCKSIZE_x, BLOCKSIZE_y, BLOCKSIZE_z);
|
||||||
|
|
||||||
|
copy_3d_array_kernel<<<gridSize, blockSize>>>(source, dest, dimension1, dimension2, dimension3);
|
||||||
|
|
||||||
|
gpuErrchk( cudaPeekAtLastError() );
|
||||||
|
gpuErrchk( cudaDeviceSynchronize() );
|
||||||
|
}
|
||||||
|
#else
|
||||||
|
void copy_3d_array(float*** source, float*** dest, int dimension1, int dimension2, int dimension3) {
|
||||||
|
for (int i=0; i < dimension1; i++) {
|
||||||
|
for (int j=0; j < dimension2; j++) {
|
||||||
|
for (int k=0; k < dimension3; k++) {
|
||||||
|
dest[i][j][k] = source[i][j][k];
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
#endif
|
39
src/utils.cu
39
src/utils.cu
@ -11,6 +11,10 @@
|
|||||||
|
|
||||||
#include "include/utils.h"
|
#include "include/utils.h"
|
||||||
|
|
||||||
|
#define BLOCKSIZE_x 16
|
||||||
|
#define BLOCKSIZE_y 8
|
||||||
|
#define BLOCKSIZE_z 8
|
||||||
|
|
||||||
|
|
||||||
int i_div_up(int a, int b) { // Partie entière supérieure de a/b
|
int i_div_up(int a, int b) { // Partie entière supérieure de a/b
|
||||||
return ((a % b) != 0) ? (a / b + 1) : (a / b);
|
return ((a % b) != 0) ? (a / b + 1) : (a / b);
|
||||||
@ -55,3 +59,38 @@ bool check_cuda_compatibility() {
|
|||||||
return false;
|
return false;
|
||||||
#endif
|
#endif
|
||||||
}
|
}
|
||||||
|
|
||||||
|
#ifdef __CUDACC__
|
||||||
|
__global__ void copy_3d_array_kernel(float*** source, float*** dest, int dimension1, int dimension2, int dimension3) {
|
||||||
|
int idx = threadIdx.x + blockDim.x*blockIdx.x; // < dimension1
|
||||||
|
int idy = threadIdx.y + blockDim.y*blockIdx.y; // < dimension2
|
||||||
|
int idz = threadIdx.z + blockDim.z*blockIdx.z; // < dimension3
|
||||||
|
|
||||||
|
if (idx >= dimension1 || idy >= dimension2 || idz >= dimension3) {
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
|
||||||
|
dest[idx][idy][idz] = source[idx][idy][idz];
|
||||||
|
}
|
||||||
|
|
||||||
|
extern "C"
|
||||||
|
void copy_3d_array(float*** source, float*** dest, int dimension1, int dimension2, int dimension3) {
|
||||||
|
dim3 gridSize(i_div_up(dimension1, BLOCKSIZE_x), i_div_up(dimension2, BLOCKSIZE_y), i_div_up(dimension3, BLOCKSIZE_z));
|
||||||
|
dim3 blockSize(BLOCKSIZE_x, BLOCKSIZE_y, BLOCKSIZE_z);
|
||||||
|
|
||||||
|
copy_3d_array_kernel<<<gridSize, blockSize>>>(source, dest, dimension1, dimension2, dimension3);
|
||||||
|
|
||||||
|
gpuErrchk( cudaPeekAtLastError() );
|
||||||
|
gpuErrchk( cudaDeviceSynchronize() );
|
||||||
|
}
|
||||||
|
#else
|
||||||
|
void copy_3d_array(float*** source, float*** dest, int dimension1, int dimension2, int dimension3) {
|
||||||
|
for (int i=0; i < dimension1; i++) {
|
||||||
|
for (int j=0; j < dimension2; j++) {
|
||||||
|
for (int k=0; k < dimension3; k++) {
|
||||||
|
dest[i][j][k] = source[i][j][k];
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
#endif
|
Loading…
Reference in New Issue
Block a user