diff --git a/src/cnn/cnn.c b/src/cnn/cnn.c index fc4894b..9dd2dcb 100644 --- a/src/cnn/cnn.c +++ b/src/cnn/cnn.c @@ -12,6 +12,7 @@ #include "include/make.h" #include "../include/colors.h" +#include "../include/utils.h" #include "include/cnn.h" // Augmente les dimensions de l'image d'entrée @@ -188,7 +189,7 @@ void forward_propagation(Network* network) { */ if (k_i->cnn) { // Convolution make_convolution(k_i->cnn, input, output, output_width); - copy_input_to_input_z(output, output_z, output_depth, output_width, output_width); + copy_3d_array(output, output_z, output_depth, output_width, output_width); apply_function_to_matrix(activation, output, output_depth, output_width); } else if (k_i->nn) { // Full connection @@ -197,7 +198,7 @@ void forward_propagation(Network* network) { } else { // Matrice -> Vecteur make_dense_linearized(k_i->nn, input, output[0][0], input_depth, input_width, output_width); } - copy_input_to_input_z(output, output_z, 1, 1, output_width); + copy_3d_array(output, output_z, 1, 1, output_width); apply_function_to_vector(activation, output, output_width); } else { // Pooling @@ -214,7 +215,7 @@ void forward_propagation(Network* network) { printf("identifiant: %d, position: %d\n", pooling, i); } } - copy_input_to_input_z(output, output_z, output_depth, output_width, output_width); + copy_3d_array(output, output_z, output_depth, output_width, output_width); } } } @@ -281,16 +282,6 @@ void drop_neurones(float*** input, int depth, int dim1, int dim2, int dropout) { } } -void copy_input_to_input_z(float*** output, float*** output_z, int output_depth, int output_rows, int output_columns) { - for (int i=0; i= dimension1 || idy >= dimension2 || idz >= dimension3) { + return; + } + + dest[idx][idy][idz] = source[idx][idy][idz]; +} + +void copy_3d_array(float*** source, float*** dest, int dimension1, int dimension2, int dimension3) { + dim3 gridSize(i_div_up(dimension1, BLOCKSIZE_x), i_div_up(dimension2, BLOCKSIZE_y), i_div_up(dimension3, BLOCKSIZE_z)); + dim3 blockSize(BLOCKSIZE_x, BLOCKSIZE_y, BLOCKSIZE_z); + + copy_3d_array_kernel<<>>(source, dest, dimension1, dimension2, dimension3); + + gpuErrchk( cudaPeekAtLastError() ); + gpuErrchk( cudaDeviceSynchronize() ); +} +#else +void copy_3d_array(float*** source, float*** dest, int dimension1, int dimension2, int dimension3) { + for (int i=0; i < dimension1; i++) { + for (int j=0; j < dimension2; j++) { + for (int k=0; k < dimension3; k++) { + dest[i][j][k] = source[i][j][k]; + } + } + } +} +#endif \ No newline at end of file diff --git a/src/utils.cu b/src/utils.cu index 8681646..bc60c5f 100644 --- a/src/utils.cu +++ b/src/utils.cu @@ -11,6 +11,10 @@ #include "include/utils.h" +#define BLOCKSIZE_x 16 +#define BLOCKSIZE_y 8 +#define BLOCKSIZE_z 8 + int i_div_up(int a, int b) { // Partie entière supérieure de a/b return ((a % b) != 0) ? (a / b + 1) : (a / b); @@ -55,3 +59,38 @@ bool check_cuda_compatibility() { return false; #endif } + +#ifdef __CUDACC__ +__global__ void copy_3d_array_kernel(float*** source, float*** dest, int dimension1, int dimension2, int dimension3) { + int idx = threadIdx.x + blockDim.x*blockIdx.x; // < dimension1 + int idy = threadIdx.y + blockDim.y*blockIdx.y; // < dimension2 + int idz = threadIdx.z + blockDim.z*blockIdx.z; // < dimension3 + + if (idx >= dimension1 || idy >= dimension2 || idz >= dimension3) { + return; + } + + dest[idx][idy][idz] = source[idx][idy][idz]; +} + +extern "C" +void copy_3d_array(float*** source, float*** dest, int dimension1, int dimension2, int dimension3) { + dim3 gridSize(i_div_up(dimension1, BLOCKSIZE_x), i_div_up(dimension2, BLOCKSIZE_y), i_div_up(dimension3, BLOCKSIZE_z)); + dim3 blockSize(BLOCKSIZE_x, BLOCKSIZE_y, BLOCKSIZE_z); + + copy_3d_array_kernel<<>>(source, dest, dimension1, dimension2, dimension3); + + gpuErrchk( cudaPeekAtLastError() ); + gpuErrchk( cudaDeviceSynchronize() ); +} +#else +void copy_3d_array(float*** source, float*** dest, int dimension1, int dimension2, int dimension3) { + for (int i=0; i < dimension1; i++) { + for (int j=0; j < dimension2; j++) { + for (int k=0; k < dimension3; k++) { + dest[i][j][k] = source[i][j][k]; + } + } + } +} +#endif \ No newline at end of file