mirror of
https://github.com/augustin64/projet-tipe
synced 2025-01-23 23:26:25 +01:00
utils:cuda: Select best available GPU
This commit is contained in:
parent
e1617a72a8
commit
dbd5362d7d
@ -127,7 +127,7 @@ void* train_thread(void* parameters) {
|
|||||||
|
|
||||||
void train(int dataset_type, char* images_file, char* labels_file, char* data_dir, int epochs, char* out, char* recover) {
|
void train(int dataset_type, char* images_file, char* labels_file, char* data_dir, int epochs, char* out, char* recover) {
|
||||||
#ifdef USE_CUDA
|
#ifdef USE_CUDA
|
||||||
bool compatibility = check_cuda_compatibility();
|
bool compatibility = cuda_setup(true);
|
||||||
if (!compatibility) {
|
if (!compatibility) {
|
||||||
printf("Exiting.\n");
|
printf("Exiting.\n");
|
||||||
exit(1);
|
exit(1);
|
||||||
|
@ -60,8 +60,10 @@ extern "C"
|
|||||||
#endif
|
#endif
|
||||||
/*
|
/*
|
||||||
* Vérification de la compatibilité CUDA
|
* Vérification de la compatibilité CUDA
|
||||||
|
* spécifier avec "verbose" si il faut afficher
|
||||||
|
* la carte utilisée notamment
|
||||||
*/
|
*/
|
||||||
bool check_cuda_compatibility();
|
bool cuda_setup(bool verbose);
|
||||||
|
|
||||||
#ifdef __CUDACC__
|
#ifdef __CUDACC__
|
||||||
extern "C"
|
extern "C"
|
||||||
|
@ -1,6 +1,7 @@
|
|||||||
#include <stdbool.h>
|
#include <stdbool.h>
|
||||||
#include <stdlib.h>
|
#include <stdlib.h>
|
||||||
#include <stdio.h>
|
#include <stdio.h>
|
||||||
|
|
||||||
#ifdef USE_CUDA
|
#ifdef USE_CUDA
|
||||||
#ifndef __CUDACC__
|
#ifndef __CUDACC__
|
||||||
#include "cuda_runtime.h"
|
#include "cuda_runtime.h"
|
||||||
@ -42,21 +43,31 @@ int i_div_up(int a, int b) { // Partie entière supérieure de a/b
|
|||||||
#ifdef __CUDACC__
|
#ifdef __CUDACC__
|
||||||
extern "C"
|
extern "C"
|
||||||
#endif
|
#endif
|
||||||
bool check_cuda_compatibility() {
|
bool cuda_setup(bool verbose) {
|
||||||
#ifdef __CUDACC__
|
#ifdef __CUDACC__
|
||||||
int nDevices;
|
int nDevices;
|
||||||
|
int selected_device = 0;
|
||||||
|
cudaDeviceProp selected_prop;
|
||||||
cudaDeviceProp prop;
|
cudaDeviceProp prop;
|
||||||
|
|
||||||
cudaGetDeviceCount(&nDevices);
|
cudaGetDeviceCount(&nDevices);
|
||||||
if (nDevices == 0) {
|
if (nDevices <= 0) { // I've seen weird issues when there is no GPU at all
|
||||||
|
if (verbose) {
|
||||||
printf("Pas d'utilisation du GPU\n\n");
|
printf("Pas d'utilisation du GPU\n\n");
|
||||||
|
}
|
||||||
return false;
|
return false;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
if (verbose) {
|
||||||
printf("GPUs disponibles:\n");
|
printf("GPUs disponibles:\n");
|
||||||
|
}
|
||||||
|
|
||||||
|
cudaGetDeviceProperties(&selected_prop, selected_device);
|
||||||
|
|
||||||
for (int i=0; i < nDevices; i++) {
|
for (int i=0; i < nDevices; i++) {
|
||||||
cudaGetDeviceProperties(&prop, i);
|
cudaGetDeviceProperties(&prop, i);
|
||||||
|
|
||||||
|
if (verbose) {
|
||||||
printf(" - %s\n\t - Compute Capability: %d.%d\n\t - Memory available: ", prop.name, prop.major, prop.minor);
|
printf(" - %s\n\t - Compute Capability: %d.%d\n\t - Memory available: ", prop.name, prop.major, prop.minor);
|
||||||
printf_memory(prop.totalGlobalMem);
|
printf_memory(prop.totalGlobalMem);
|
||||||
printf("\n\t - Shared Memory per block: ");
|
printf("\n\t - Shared Memory per block: ");
|
||||||
@ -64,17 +75,33 @@ bool check_cuda_compatibility() {
|
|||||||
printf("\n\n");
|
printf("\n\n");
|
||||||
}
|
}
|
||||||
|
|
||||||
cudaGetDeviceProperties(&prop, 0);
|
if (prop.clockRate*prop.multiProcessorCount >= selected_prop.clockRate*selected_prop.multiProcessorCount) { // This criteria approximately matches the best device
|
||||||
printf("Utilisation du GPU: " BLUE "%s" RESET "\n\n", prop.name);
|
selected_prop = prop;
|
||||||
|
selected_device = i;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
if (prop.sharedMemPerBlock != MEMORY_BLOCK) {
|
cudaSetDevice(selected_device); // Select the best device for computation
|
||||||
|
if (verbose) {
|
||||||
|
printf("Utilisation du GPU: " BLUE "%s" RESET "\n\n", selected_prop.name);
|
||||||
|
}
|
||||||
|
|
||||||
|
if (BLOCKSIZE_x*BLOCKSIZE_y*BLOCKSIZE_z > prop.maxThreadsPerBlock) {
|
||||||
|
printf_error((char*)"La taille de bloc sélectionnée est trop grande.\n");
|
||||||
|
printf("\tMaximum accepté: %d\n", selected_prop.maxThreadsPerBlock);
|
||||||
|
exit(1);
|
||||||
|
}
|
||||||
|
if (selected_prop.sharedMemPerBlock != MEMORY_BLOCK) { // C'est un warning, on l'affiche dans tous les cas
|
||||||
printf_warning((char*)"La taille des blocs mémoire du GPU et celle utilisée dans le code diffèrent.\n");
|
printf_warning((char*)"La taille des blocs mémoire du GPU et celle utilisée dans le code diffèrent.\n");
|
||||||
printf("\tCela peut mener à une utilisation supplémentaire de VRAM.\n");
|
printf("\tCela peut mener à une utilisation supplémentaire de VRAM.\n");
|
||||||
printf("\tChanger MEMORY_BLOCK à %ld dans src/include/memory_management.h\n", prop.sharedMemPerBlock);
|
printf("\tChanger MEMORY_BLOCK à %ld dans src/include/memory_management.h\n", selected_prop.sharedMemPerBlock);
|
||||||
}
|
}
|
||||||
return true;
|
return true;
|
||||||
#else
|
#else
|
||||||
|
if (verbose) {
|
||||||
printf("Pas d'utilisation du GPU\n\n");
|
printf("Pas d'utilisation du GPU\n\n");
|
||||||
|
}
|
||||||
|
|
||||||
return false;
|
return false;
|
||||||
#endif
|
#endif
|
||||||
}
|
}
|
||||||
|
@ -1,6 +1,7 @@
|
|||||||
#include <stdbool.h>
|
#include <stdbool.h>
|
||||||
#include <stdlib.h>
|
#include <stdlib.h>
|
||||||
#include <stdio.h>
|
#include <stdio.h>
|
||||||
|
|
||||||
#ifdef USE_CUDA
|
#ifdef USE_CUDA
|
||||||
#ifndef __CUDACC__
|
#ifndef __CUDACC__
|
||||||
#include "cuda_runtime.h"
|
#include "cuda_runtime.h"
|
||||||
@ -42,21 +43,31 @@ int i_div_up(int a, int b) { // Partie entière supérieure de a/b
|
|||||||
#ifdef __CUDACC__
|
#ifdef __CUDACC__
|
||||||
extern "C"
|
extern "C"
|
||||||
#endif
|
#endif
|
||||||
bool check_cuda_compatibility() {
|
bool cuda_setup(bool verbose) {
|
||||||
#ifdef __CUDACC__
|
#ifdef __CUDACC__
|
||||||
int nDevices;
|
int nDevices;
|
||||||
|
int selected_device = 0;
|
||||||
|
cudaDeviceProp selected_prop;
|
||||||
cudaDeviceProp prop;
|
cudaDeviceProp prop;
|
||||||
|
|
||||||
cudaGetDeviceCount(&nDevices);
|
cudaGetDeviceCount(&nDevices);
|
||||||
if (nDevices == 0) {
|
if (nDevices <= 0) { // I've seen weird issues when there is no GPU at all
|
||||||
|
if (verbose) {
|
||||||
printf("Pas d'utilisation du GPU\n\n");
|
printf("Pas d'utilisation du GPU\n\n");
|
||||||
|
}
|
||||||
return false;
|
return false;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
if (verbose) {
|
||||||
printf("GPUs disponibles:\n");
|
printf("GPUs disponibles:\n");
|
||||||
|
}
|
||||||
|
|
||||||
|
cudaGetDeviceProperties(&selected_prop, selected_device);
|
||||||
|
|
||||||
for (int i=0; i < nDevices; i++) {
|
for (int i=0; i < nDevices; i++) {
|
||||||
cudaGetDeviceProperties(&prop, i);
|
cudaGetDeviceProperties(&prop, i);
|
||||||
|
|
||||||
|
if (verbose) {
|
||||||
printf(" - %s\n\t - Compute Capability: %d.%d\n\t - Memory available: ", prop.name, prop.major, prop.minor);
|
printf(" - %s\n\t - Compute Capability: %d.%d\n\t - Memory available: ", prop.name, prop.major, prop.minor);
|
||||||
printf_memory(prop.totalGlobalMem);
|
printf_memory(prop.totalGlobalMem);
|
||||||
printf("\n\t - Shared Memory per block: ");
|
printf("\n\t - Shared Memory per block: ");
|
||||||
@ -64,17 +75,33 @@ bool check_cuda_compatibility() {
|
|||||||
printf("\n\n");
|
printf("\n\n");
|
||||||
}
|
}
|
||||||
|
|
||||||
cudaGetDeviceProperties(&prop, 0);
|
if (prop.clockRate*prop.multiProcessorCount >= selected_prop.clockRate*selected_prop.multiProcessorCount) { // This criteria approximately matches the best device
|
||||||
printf("Utilisation du GPU: " BLUE "%s" RESET "\n\n", prop.name);
|
selected_prop = prop;
|
||||||
|
selected_device = i;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
if (prop.sharedMemPerBlock != MEMORY_BLOCK) {
|
cudaSetDevice(selected_device); // Select the best device for computation
|
||||||
|
if (verbose) {
|
||||||
|
printf("Utilisation du GPU: " BLUE "%s" RESET "\n\n", selected_prop.name);
|
||||||
|
}
|
||||||
|
|
||||||
|
if (BLOCKSIZE_x*BLOCKSIZE_y*BLOCKSIZE_z > prop.maxThreadsPerBlock) {
|
||||||
|
printf_error((char*)"La taille de bloc sélectionnée est trop grande.\n");
|
||||||
|
printf("\tMaximum accepté: %d\n", selected_prop.maxThreadsPerBlock);
|
||||||
|
exit(1);
|
||||||
|
}
|
||||||
|
if (selected_prop.sharedMemPerBlock != MEMORY_BLOCK) { // C'est un warning, on l'affiche dans tous les cas
|
||||||
printf_warning((char*)"La taille des blocs mémoire du GPU et celle utilisée dans le code diffèrent.\n");
|
printf_warning((char*)"La taille des blocs mémoire du GPU et celle utilisée dans le code diffèrent.\n");
|
||||||
printf("\tCela peut mener à une utilisation supplémentaire de VRAM.\n");
|
printf("\tCela peut mener à une utilisation supplémentaire de VRAM.\n");
|
||||||
printf("\tChanger MEMORY_BLOCK à %ld dans src/include/memory_management.h\n", prop.sharedMemPerBlock);
|
printf("\tChanger MEMORY_BLOCK à %ld dans src/include/memory_management.h\n", selected_prop.sharedMemPerBlock);
|
||||||
}
|
}
|
||||||
return true;
|
return true;
|
||||||
#else
|
#else
|
||||||
|
if (verbose) {
|
||||||
printf("Pas d'utilisation du GPU\n\n");
|
printf("Pas d'utilisation du GPU\n\n");
|
||||||
|
}
|
||||||
|
|
||||||
return false;
|
return false;
|
||||||
#endif
|
#endif
|
||||||
}
|
}
|
||||||
|
@ -217,7 +217,7 @@ int main(int argc, char* argv[]) {
|
|||||||
|
|
||||||
/*
|
/*
|
||||||
printf("Checking CUDA compatibility.\n");
|
printf("Checking CUDA compatibility.\n");
|
||||||
bool cuda_compatible = check_cuda_compatibility();
|
bool cuda_compatible = cuda_setup(true);
|
||||||
if (!cuda_compatible) {
|
if (!cuda_compatible) {
|
||||||
printf(RED "CUDA not compatible, skipping tests.\n" RESET);
|
printf(RED "CUDA not compatible, skipping tests.\n" RESET);
|
||||||
return 0;
|
return 0;
|
||||||
|
@ -192,7 +192,7 @@ void run_convolution_test(int input_width, int output_width, int rows, int colum
|
|||||||
|
|
||||||
int main() {
|
int main() {
|
||||||
printf("Checking CUDA compatibility.\n");
|
printf("Checking CUDA compatibility.\n");
|
||||||
bool cuda_compatible = check_cuda_compatibility();
|
bool cuda_compatible = cuda_setup(true);
|
||||||
if (!cuda_compatible) {
|
if (!cuda_compatible) {
|
||||||
printf(RED "CUDA not compatible, skipping tests.\n" RESET);
|
printf(RED "CUDA not compatible, skipping tests.\n" RESET);
|
||||||
return 0;
|
return 0;
|
||||||
|
@ -91,7 +91,7 @@ void test1(int activation, bool use_local_kernel) {
|
|||||||
|
|
||||||
int main() {
|
int main() {
|
||||||
printf("Checking CUDA compatibility.\n");
|
printf("Checking CUDA compatibility.\n");
|
||||||
bool cuda_compatible = check_cuda_compatibility();
|
bool cuda_compatible = cuda_setup(true);
|
||||||
if (!cuda_compatible) {
|
if (!cuda_compatible) {
|
||||||
printf(RED "CUDA not compatible, skipping tests.\n" RESET);
|
printf(RED "CUDA not compatible, skipping tests.\n" RESET);
|
||||||
return 0;
|
return 0;
|
||||||
|
@ -127,7 +127,7 @@ void run_matrices_test(int n, int p, int q) {
|
|||||||
|
|
||||||
int main() {
|
int main() {
|
||||||
printf("Checking CUDA compatibility.\n");
|
printf("Checking CUDA compatibility.\n");
|
||||||
bool cuda_compatible = check_cuda_compatibility();
|
bool cuda_compatible = cuda_setup(true);
|
||||||
if (!cuda_compatible) {
|
if (!cuda_compatible) {
|
||||||
printf(RED "CUDA not compatible, skipping tests.\n" RESET);
|
printf(RED "CUDA not compatible, skipping tests.\n" RESET);
|
||||||
return 0;
|
return 0;
|
||||||
|
@ -18,7 +18,7 @@ __global__ void check_access(int* array, int range) {
|
|||||||
|
|
||||||
int main() {
|
int main() {
|
||||||
printf("Checking CUDA compatibility.\n");
|
printf("Checking CUDA compatibility.\n");
|
||||||
bool cuda_compatible = check_cuda_compatibility();
|
bool cuda_compatible = cuda_setup(true);
|
||||||
if (!cuda_compatible) {
|
if (!cuda_compatible) {
|
||||||
printf(RED "CUDA not compatible, skipping tests.\n" RESET);
|
printf(RED "CUDA not compatible, skipping tests.\n" RESET);
|
||||||
return 0;
|
return 0;
|
||||||
|
Loading…
Reference in New Issue
Block a user