diff --git a/src/cnn/train.c b/src/cnn/train.c index ad971dd..72cd5a7 100644 --- a/src/cnn/train.c +++ b/src/cnn/train.c @@ -127,7 +127,7 @@ void* train_thread(void* parameters) { void train(int dataset_type, char* images_file, char* labels_file, char* data_dir, int epochs, char* out, char* recover) { #ifdef USE_CUDA - bool compatibility = check_cuda_compatibility(); + bool compatibility = cuda_setup(true); if (!compatibility) { printf("Exiting.\n"); exit(1); diff --git a/src/common/include/utils.h b/src/common/include/utils.h index 0e4603f..8c66828 100644 --- a/src/common/include/utils.h +++ b/src/common/include/utils.h @@ -60,8 +60,10 @@ extern "C" #endif /* * Vérification de la compatibilité CUDA +* spécifier avec "verbose" si il faut afficher +* la carte utilisée notamment */ -bool check_cuda_compatibility(); +bool cuda_setup(bool verbose); #ifdef __CUDACC__ extern "C" diff --git a/src/common/utils.c b/src/common/utils.c index 93b1671..5292dc3 100644 --- a/src/common/utils.c +++ b/src/common/utils.c @@ -1,6 +1,7 @@ #include #include #include + #ifdef USE_CUDA #ifndef __CUDACC__ #include "cuda_runtime.h" @@ -42,39 +43,65 @@ int i_div_up(int a, int b) { // Partie entière supérieure de a/b #ifdef __CUDACC__ extern "C" #endif -bool check_cuda_compatibility() { +bool cuda_setup(bool verbose) { #ifdef __CUDACC__ int nDevices; + int selected_device = 0; + cudaDeviceProp selected_prop; cudaDeviceProp prop; cudaGetDeviceCount(&nDevices); - if (nDevices == 0) { - printf("Pas d'utilisation du GPU\n\n"); + if (nDevices <= 0) { // I've seen weird issues when there is no GPU at all + if (verbose) { + printf("Pas d'utilisation du GPU\n\n"); + } return false; } - printf("GPUs disponibles:\n"); + if (verbose) { + printf("GPUs disponibles:\n"); + } + + cudaGetDeviceProperties(&selected_prop, selected_device); for (int i=0; i < nDevices; i++) { cudaGetDeviceProperties(&prop, i); - printf(" - %s\n\t - Compute Capability: %d.%d\n\t - Memory available: ", prop.name, prop.major, prop.minor); - printf_memory(prop.totalGlobalMem); - printf("\n\t - Shared Memory per block: "); - printf_memory(prop.sharedMemPerBlock); - printf("\n\n"); + + if (verbose) { + printf(" - %s\n\t - Compute Capability: %d.%d\n\t - Memory available: ", prop.name, prop.major, prop.minor); + printf_memory(prop.totalGlobalMem); + printf("\n\t - Shared Memory per block: "); + printf_memory(prop.sharedMemPerBlock); + printf("\n\n"); + } + + if (prop.clockRate*prop.multiProcessorCount >= selected_prop.clockRate*selected_prop.multiProcessorCount) { // This criteria approximately matches the best device + selected_prop = prop; + selected_device = i; + } } - cudaGetDeviceProperties(&prop, 0); - printf("Utilisation du GPU: " BLUE "%s" RESET "\n\n", prop.name); + cudaSetDevice(selected_device); // Select the best device for computation + if (verbose) { + printf("Utilisation du GPU: " BLUE "%s" RESET "\n\n", selected_prop.name); + } - if (prop.sharedMemPerBlock != MEMORY_BLOCK) { + if (BLOCKSIZE_x*BLOCKSIZE_y*BLOCKSIZE_z > prop.maxThreadsPerBlock) { + printf_error((char*)"La taille de bloc sélectionnée est trop grande.\n"); + printf("\tMaximum accepté: %d\n", selected_prop.maxThreadsPerBlock); + exit(1); + } + if (selected_prop.sharedMemPerBlock != MEMORY_BLOCK) { // C'est un warning, on l'affiche dans tous les cas printf_warning((char*)"La taille des blocs mémoire du GPU et celle utilisée dans le code diffèrent.\n"); printf("\tCela peut mener à une utilisation supplémentaire de VRAM.\n"); - printf("\tChanger MEMORY_BLOCK à %ld dans src/include/memory_management.h\n", prop.sharedMemPerBlock); + printf("\tChanger MEMORY_BLOCK à %ld dans src/include/memory_management.h\n", selected_prop.sharedMemPerBlock); } return true; #else - printf("Pas d'utilisation du GPU\n\n"); + if (verbose) { + printf("Pas d'utilisation du GPU\n\n"); + } + return false; #endif } diff --git a/src/common/utils.cu b/src/common/utils.cu index 93b1671..5292dc3 100644 --- a/src/common/utils.cu +++ b/src/common/utils.cu @@ -1,6 +1,7 @@ #include #include #include + #ifdef USE_CUDA #ifndef __CUDACC__ #include "cuda_runtime.h" @@ -42,39 +43,65 @@ int i_div_up(int a, int b) { // Partie entière supérieure de a/b #ifdef __CUDACC__ extern "C" #endif -bool check_cuda_compatibility() { +bool cuda_setup(bool verbose) { #ifdef __CUDACC__ int nDevices; + int selected_device = 0; + cudaDeviceProp selected_prop; cudaDeviceProp prop; cudaGetDeviceCount(&nDevices); - if (nDevices == 0) { - printf("Pas d'utilisation du GPU\n\n"); + if (nDevices <= 0) { // I've seen weird issues when there is no GPU at all + if (verbose) { + printf("Pas d'utilisation du GPU\n\n"); + } return false; } - printf("GPUs disponibles:\n"); + if (verbose) { + printf("GPUs disponibles:\n"); + } + + cudaGetDeviceProperties(&selected_prop, selected_device); for (int i=0; i < nDevices; i++) { cudaGetDeviceProperties(&prop, i); - printf(" - %s\n\t - Compute Capability: %d.%d\n\t - Memory available: ", prop.name, prop.major, prop.minor); - printf_memory(prop.totalGlobalMem); - printf("\n\t - Shared Memory per block: "); - printf_memory(prop.sharedMemPerBlock); - printf("\n\n"); + + if (verbose) { + printf(" - %s\n\t - Compute Capability: %d.%d\n\t - Memory available: ", prop.name, prop.major, prop.minor); + printf_memory(prop.totalGlobalMem); + printf("\n\t - Shared Memory per block: "); + printf_memory(prop.sharedMemPerBlock); + printf("\n\n"); + } + + if (prop.clockRate*prop.multiProcessorCount >= selected_prop.clockRate*selected_prop.multiProcessorCount) { // This criteria approximately matches the best device + selected_prop = prop; + selected_device = i; + } } - cudaGetDeviceProperties(&prop, 0); - printf("Utilisation du GPU: " BLUE "%s" RESET "\n\n", prop.name); + cudaSetDevice(selected_device); // Select the best device for computation + if (verbose) { + printf("Utilisation du GPU: " BLUE "%s" RESET "\n\n", selected_prop.name); + } - if (prop.sharedMemPerBlock != MEMORY_BLOCK) { + if (BLOCKSIZE_x*BLOCKSIZE_y*BLOCKSIZE_z > prop.maxThreadsPerBlock) { + printf_error((char*)"La taille de bloc sélectionnée est trop grande.\n"); + printf("\tMaximum accepté: %d\n", selected_prop.maxThreadsPerBlock); + exit(1); + } + if (selected_prop.sharedMemPerBlock != MEMORY_BLOCK) { // C'est un warning, on l'affiche dans tous les cas printf_warning((char*)"La taille des blocs mémoire du GPU et celle utilisée dans le code diffèrent.\n"); printf("\tCela peut mener à une utilisation supplémentaire de VRAM.\n"); - printf("\tChanger MEMORY_BLOCK à %ld dans src/include/memory_management.h\n", prop.sharedMemPerBlock); + printf("\tChanger MEMORY_BLOCK à %ld dans src/include/memory_management.h\n", selected_prop.sharedMemPerBlock); } return true; #else - printf("Pas d'utilisation du GPU\n\n"); + if (verbose) { + printf("Pas d'utilisation du GPU\n\n"); + } + return false; #endif } diff --git a/src/scripts/convolution_benchmark.cu b/src/scripts/convolution_benchmark.cu index 70d397a..92accc4 100644 --- a/src/scripts/convolution_benchmark.cu +++ b/src/scripts/convolution_benchmark.cu @@ -217,7 +217,7 @@ int main(int argc, char* argv[]) { /* printf("Checking CUDA compatibility.\n"); - bool cuda_compatible = check_cuda_compatibility(); + bool cuda_compatible = cuda_setup(true); if (!cuda_compatible) { printf(RED "CUDA not compatible, skipping tests.\n" RESET); return 0; diff --git a/test/cnn_convolution.cu b/test/cnn_convolution.cu index d18c5c2..5dcba85 100644 --- a/test/cnn_convolution.cu +++ b/test/cnn_convolution.cu @@ -192,7 +192,7 @@ void run_convolution_test(int input_width, int output_width, int rows, int colum int main() { printf("Checking CUDA compatibility.\n"); - bool cuda_compatible = check_cuda_compatibility(); + bool cuda_compatible = cuda_setup(true); if (!cuda_compatible) { printf(RED "CUDA not compatible, skipping tests.\n" RESET); return 0; diff --git a/test/cnn_function.cu b/test/cnn_function.cu index 4d191f8..9ba01b2 100644 --- a/test/cnn_function.cu +++ b/test/cnn_function.cu @@ -91,7 +91,7 @@ void test1(int activation, bool use_local_kernel) { int main() { printf("Checking CUDA compatibility.\n"); - bool cuda_compatible = check_cuda_compatibility(); + bool cuda_compatible = cuda_setup(true); if (!cuda_compatible) { printf(RED "CUDA not compatible, skipping tests.\n" RESET); return 0; diff --git a/test/cnn_matrix_multiplication.cu b/test/cnn_matrix_multiplication.cu index e00a260..320cb2a 100644 --- a/test/cnn_matrix_multiplication.cu +++ b/test/cnn_matrix_multiplication.cu @@ -127,7 +127,7 @@ void run_matrices_test(int n, int p, int q) { int main() { printf("Checking CUDA compatibility.\n"); - bool cuda_compatible = check_cuda_compatibility(); + bool cuda_compatible = cuda_setup(true); if (!cuda_compatible) { printf(RED "CUDA not compatible, skipping tests.\n" RESET); return 0; diff --git a/test/memory_management.cu b/test/memory_management.cu index a575a55..d9161a7 100644 --- a/test/memory_management.cu +++ b/test/memory_management.cu @@ -18,7 +18,7 @@ __global__ void check_access(int* array, int range) { int main() { printf("Checking CUDA compatibility.\n"); - bool cuda_compatible = check_cuda_compatibility(); + bool cuda_compatible = cuda_setup(true); if (!cuda_compatible) { printf(RED "CUDA not compatible, skipping tests.\n" RESET); return 0;