tipe/src/utils.cu

#include <stdlib.h>
#include <stdio.h>
#ifdef USE_CUDA
    #ifndef __CUDACC__
        #include "cuda_runtime.h"
    #endif
#endif

#include "include/memory_management.h"
#include "include/colors.h"

#include "include/utils.h"

#define BLOCKSIZE_x 16
#define BLOCKSIZE_y 8
#define BLOCKSIZE_z 8


int i_div_up(int a, int b) { // Partie entière supérieure de a/b
    return ((a % b) != 0) ? (a / b + 1) : (a / b);
}

#ifdef __CUDACC__
extern "C"
#endif
bool check_cuda_compatibility() {
    #ifdef __CUDACC__
    int nDevices;
    cudaDeviceProp prop;

    cudaGetDeviceCount(&nDevices);
    if (nDevices == 0) {
        printf("Pas d'utilisation du GPU\n\n");
        return false;
    }

    printf("GPUs disponibles:\n");

    for (int i=0; i < nDevices; i++) {
        cudaGetDeviceProperties(&prop, i);
        printf(" - %s\n\t - Compute Capability: %d.%d\n\t - Memory available: ", prop.name, prop.major, prop.minor);
        printf_memory(prop.totalGlobalMem);
        printf("\n\t - Shared Memory per block: ");
        printf_memory(prop.sharedMemPerBlock);
        printf("\n\n");
    }

    cudaGetDeviceProperties(&prop, 0);
    printf("Utilisation du GPU: " BLUE "%s" RESET "\n\n", prop.name);

    if (prop.sharedMemPerBlock != MEMORY_BLOCK) {
        printf_warning((char*)"La taille des blocs mémoire du GPU et celle utilisée dans le code diffèrent.\n");
        printf("\tCela peut mener à une utilisation supplémentaire de VRAM.\n");
        printf("\tChanger MEMORY_BLOCK à %ld dans src/include/memory_management.h\n", prop.sharedMemPerBlock);
    }
    return true;
    #else
    printf("Pas d'utilisation du GPU\n\n");
    return false;
    #endif
}

#ifdef __CUDACC__
__global__ void copy_3d_array_kernel(float*** source, float*** dest, int dimension1, int dimension2, int dimension3) {
    int idx = threadIdx.x + blockDim.x*blockIdx.x; // < dimension1
    int idy = threadIdx.y + blockDim.y*blockIdx.y; // < dimension2
    int idz = threadIdx.z + blockDim.z*blockIdx.z; // < dimension3

    if (idx >= dimension1 || idy >= dimension2 || idz >= dimension3) {
        return;
    }

    dest[idx][idy][idz] = source[idx][idy][idz];
}

extern "C"
void copy_3d_array(float*** source, float*** dest, int dimension1, int dimension2, int dimension3) {
    dim3 gridSize(i_div_up(dimension1, BLOCKSIZE_x), i_div_up(dimension2, BLOCKSIZE_y), i_div_up(dimension3, BLOCKSIZE_z));
    dim3 blockSize(BLOCKSIZE_x, BLOCKSIZE_y, BLOCKSIZE_z);

    copy_3d_array_kernel<<<gridSize, blockSize>>>(source, dest, dimension1, dimension2, dimension3);

    gpuErrchk( cudaPeekAtLastError() );
    gpuErrchk( cudaDeviceSynchronize() );
}
#else
void copy_3d_array(float*** source, float*** dest, int dimension1, int dimension2, int dimension3) {
    for (int i=0; i < dimension1; i++) {
        for (int j=0; j < dimension2; j++) {
            for (int k=0; k < dimension3; k++) {
                dest[i][j][k] = source[i][j][k];
            }
        }
    }
}
#endif
Ajout de utils.cu & recoloration des tests Ajout de test.cu: Suppression de redondances dans le code Recoloration des tests: Ajout de couleurs pour clarifier le bon fonctionnement ou non du programme 2022-11-11 11:20:30 +01:00			`#include <stdlib.h>`
			`#include <stdio.h>`
Update utils.c/u 2023-01-29 09:40:55 +01:00			`#ifdef USE_CUDA`
			`#ifndef __CUDACC__`
			`#include "cuda_runtime.h"`
			`#endif`
			`#endif`
Ajout de utils.cu & recoloration des tests Ajout de test.cu: Suppression de redondances dans le code Recoloration des tests: Ajout de couleurs pour clarifier le bon fonctionnement ou non du programme 2022-11-11 11:20:30 +01:00
Check device memory block size & add pretty-prints & remove some warnings 2023-02-19 15:01:58 +01:00			`#include "include/memory_management.h"`
Ajout de utils.cu & recoloration des tests Ajout de test.cu: Suppression de redondances dans le code Recoloration des tests: Ajout de couleurs pour clarifier le bon fonctionnement ou non du programme 2022-11-11 11:20:30 +01:00			`#include "include/colors.h"`

Check device memory block size & add pretty-prints & remove some warnings 2023-02-19 15:01:58 +01:00			`#include "include/utils.h"`

Implement copy_3d_array in CUDA 2023-03-28 12:38:06 +02:00			`#define BLOCKSIZE_x 16`
			`#define BLOCKSIZE_y 8`
			`#define BLOCKSIZE_z 8`

Ajout de utils.cu & recoloration des tests Ajout de test.cu: Suppression de redondances dans le code Recoloration des tests: Ajout de couleurs pour clarifier le bon fonctionnement ou non du programme 2022-11-11 11:20:30 +01:00
			`int i_div_up(int a, int b) { // Partie entière supérieure de a/b`
			`return ((a % b) != 0) ? (a / b + 1) : (a / b);`
			`}`

Update utils.c/u 2023-01-29 09:40:55 +01:00			`#ifdef __CUDACC__`
Simplify `extern "C"` expressions 2023-02-15 11:22:51 +01:00			`extern "C"`
Update utils.c/u 2023-01-29 09:40:55 +01:00			`#endif`
Ajout de utils.cu & recoloration des tests Ajout de test.cu: Suppression de redondances dans le code Recoloration des tests: Ajout de couleurs pour clarifier le bon fonctionnement ou non du programme 2022-11-11 11:20:30 +01:00			`bool check_cuda_compatibility() {`
			`#ifdef __CUDACC__`
			`int nDevices;`
			`cudaDeviceProp prop;`

			`cudaGetDeviceCount(&nDevices);`
			`if (nDevices == 0) {`
			`printf("Pas d'utilisation du GPU\n\n");`
			`return false;`
			`}`

			`printf("GPUs disponibles:\n");`

			`for (int i=0; i < nDevices; i++) {`
			`cudaGetDeviceProperties(&prop, i);`
Check device memory block size & add pretty-prints & remove some warnings 2023-02-19 15:01:58 +01:00			`printf(" - %s\n\t - Compute Capability: %d.%d\n\t - Memory available: ", prop.name, prop.major, prop.minor);`
			`printf_memory(prop.totalGlobalMem);`
			`printf("\n\t - Shared Memory per block: ");`
			`printf_memory(prop.sharedMemPerBlock);`
			`printf("\n\n");`
Ajout de utils.cu & recoloration des tests Ajout de test.cu: Suppression de redondances dans le code Recoloration des tests: Ajout de couleurs pour clarifier le bon fonctionnement ou non du programme 2022-11-11 11:20:30 +01:00			`}`

			`cudaGetDeviceProperties(&prop, 0);`
Check device memory block size & add pretty-prints & remove some warnings 2023-02-19 15:01:58 +01:00			`printf("Utilisation du GPU: " BLUE "%s" RESET "\n\n", prop.name);`

			`if (prop.sharedMemPerBlock != MEMORY_BLOCK) {`
			`printf_warning((char*)"La taille des blocs mémoire du GPU et celle utilisée dans le code diffèrent.\n");`
			`printf("\tCela peut mener à une utilisation supplémentaire de VRAM.\n");`
			`printf("\tChanger MEMORY_BLOCK à %ld dans src/include/memory_management.h\n", prop.sharedMemPerBlock);`
			`}`
Ajout de utils.cu & recoloration des tests Ajout de test.cu: Suppression de redondances dans le code Recoloration des tests: Ajout de couleurs pour clarifier le bon fonctionnement ou non du programme 2022-11-11 11:20:30 +01:00			`return true;`
			`#else`
			`printf("Pas d'utilisation du GPU\n\n");`
			`return false;`
			`#endif`
Implement custom memory management 2023-01-28 22:04:38 +01:00			`}`
Implement copy_3d_array in CUDA 2023-03-28 12:38:06 +02:00
			`#ifdef __CUDACC__`
			`__global__ void copy_3d_array_kernel(float* source, float* dest, int dimension1, int dimension2, int dimension3) {`
			`int idx = threadIdx.x + blockDim.x*blockIdx.x; // < dimension1`
			`int idy = threadIdx.y + blockDim.y*blockIdx.y; // < dimension2`
			`int idz = threadIdx.z + blockDim.z*blockIdx.z; // < dimension3`

			`if (idx >= dimension1 \|\| idy >= dimension2 \|\| idz >= dimension3) {`
			`return;`
			`}`

			`dest[idx][idy][idz] = source[idx][idy][idz];`
			`}`

			`extern "C"`
			`void copy_3d_array(float* source, float* dest, int dimension1, int dimension2, int dimension3) {`
			`dim3 gridSize(i_div_up(dimension1, BLOCKSIZE_x), i_div_up(dimension2, BLOCKSIZE_y), i_div_up(dimension3, BLOCKSIZE_z));`
			`dim3 blockSize(BLOCKSIZE_x, BLOCKSIZE_y, BLOCKSIZE_z);`

			`copy_3d_array_kernel<<<gridSize, blockSize>>>(source, dest, dimension1, dimension2, dimension3);`

			`gpuErrchk( cudaPeekAtLastError() );`
			`gpuErrchk( cudaDeviceSynchronize() );`
			`}`
			`#else`
			`void copy_3d_array(float* source, float* dest, int dimension1, int dimension2, int dimension3) {`
			`for (int i=0; i < dimension1; i++) {`
			`for (int j=0; j < dimension2; j++) {`
			`for (int k=0; k < dimension3; k++) {`
			`dest[i][j][k] = source[i][j][k];`
			`}`
			`}`
			`}`
			`}`
			`#endif`