From 2ee1bc407916762112ff949111f70c5ea9e5178a Mon Sep 17 00:00:00 2001 From: augustin64 Date: Thu, 30 Mar 2023 18:08:13 +0200 Subject: [PATCH] Add reset_3d_array function --- src/include/utils.h | 8 ++++++++ src/utils.c | 35 +++++++++++++++++++++++++++++++++++ src/utils.cu | 36 +++++++++++++++++++++++++++++++++++- 3 files changed, 78 insertions(+), 1 deletion(-) diff --git a/src/include/utils.h b/src/include/utils.h index 3305c44..b18d59c 100644 --- a/src/include/utils.h +++ b/src/include/utils.h @@ -45,4 +45,12 @@ extern "C" * Copier des valeurs d'un tableau de dimension 3 de mémoire partagée */ void copy_3d_array(float*** source, float*** dest, int dimension1, int dimension2, int dimension3); + +#ifdef __CUDACC__ +extern "C" +#endif +/* +* Remplir un tableau de 0. +*/ +void reset_3d_array(float*** source, int dimension1, int dimension2, int dimension3); #endif \ No newline at end of file diff --git a/src/utils.c b/src/utils.c index 5cee8ee..f8073eb 100644 --- a/src/utils.c +++ b/src/utils.c @@ -92,4 +92,39 @@ void copy_3d_array(float*** source, float*** dest, int dimension1, int dimension } } } +#endif + +#ifdef __CUDACC__ +__global__ void reset_3d_array_kernel(float*** dest, int dimension1, int dimension2, int dimension3) { + int idx = threadIdx.x + blockDim.x*blockIdx.x; // < dimension1 + int idy = threadIdx.y + blockDim.y*blockIdx.y; // < dimension2 + int idz = threadIdx.z + blockDim.z*blockIdx.z; // < dimension3 + + if (idx >= dimension1 || idy >= dimension2 || idz >= dimension3) { + return; + } + + dest[idx][idy][idz] = 0.; +} + +extern "C" +void reset_3d_array(float*** dest, int dimension1, int dimension2, int dimension3) { + dim3 gridSize(i_div_up(dimension1, BLOCKSIZE_x), i_div_up(dimension2, BLOCKSIZE_y), i_div_up(dimension3, BLOCKSIZE_z)); + dim3 blockSize(BLOCKSIZE_x, BLOCKSIZE_y, BLOCKSIZE_z); + + reset_3d_array_kernel<<>>(dest, dimension1, dimension2, dimension3); + + gpuErrchk( cudaPeekAtLastError() ); + gpuErrchk( cudaDeviceSynchronize() ); +} +#else +void reset_3d_array(float*** dest, int dimension1, int dimension2, int dimension3) { + for (int i=0; i < dimension1; i++) { + for (int j=0; j < dimension2; j++) { + for (int k=0; k < dimension3; k++) { + dest[i][j][k] = 0.; + } + } + } +} #endif \ No newline at end of file diff --git a/src/utils.cu b/src/utils.cu index bc60c5f..f8073eb 100644 --- a/src/utils.cu +++ b/src/utils.cu @@ -73,7 +73,6 @@ __global__ void copy_3d_array_kernel(float*** source, float*** dest, int dimensi dest[idx][idy][idz] = source[idx][idy][idz]; } -extern "C" void copy_3d_array(float*** source, float*** dest, int dimension1, int dimension2, int dimension3) { dim3 gridSize(i_div_up(dimension1, BLOCKSIZE_x), i_div_up(dimension2, BLOCKSIZE_y), i_div_up(dimension3, BLOCKSIZE_z)); dim3 blockSize(BLOCKSIZE_x, BLOCKSIZE_y, BLOCKSIZE_z); @@ -93,4 +92,39 @@ void copy_3d_array(float*** source, float*** dest, int dimension1, int dimension } } } +#endif + +#ifdef __CUDACC__ +__global__ void reset_3d_array_kernel(float*** dest, int dimension1, int dimension2, int dimension3) { + int idx = threadIdx.x + blockDim.x*blockIdx.x; // < dimension1 + int idy = threadIdx.y + blockDim.y*blockIdx.y; // < dimension2 + int idz = threadIdx.z + blockDim.z*blockIdx.z; // < dimension3 + + if (idx >= dimension1 || idy >= dimension2 || idz >= dimension3) { + return; + } + + dest[idx][idy][idz] = 0.; +} + +extern "C" +void reset_3d_array(float*** dest, int dimension1, int dimension2, int dimension3) { + dim3 gridSize(i_div_up(dimension1, BLOCKSIZE_x), i_div_up(dimension2, BLOCKSIZE_y), i_div_up(dimension3, BLOCKSIZE_z)); + dim3 blockSize(BLOCKSIZE_x, BLOCKSIZE_y, BLOCKSIZE_z); + + reset_3d_array_kernel<<>>(dest, dimension1, dimension2, dimension3); + + gpuErrchk( cudaPeekAtLastError() ); + gpuErrchk( cudaDeviceSynchronize() ); +} +#else +void reset_3d_array(float*** dest, int dimension1, int dimension2, int dimension3) { + for (int i=0; i < dimension1; i++) { + for (int j=0; j < dimension2; j++) { + for (int k=0; k < dimension3; k++) { + dest[i][j][k] = 0.; + } + } + } +} #endif \ No newline at end of file