diff --git a/Makefile b/Makefile index b3e038b..0d2e36d 100644 --- a/Makefile +++ b/Makefile @@ -30,7 +30,7 @@ LD_CFLAGS = -lm -lpthread -ljpeg -fopenmp LD_NVCCFLAGS = -ljpeg -Xcompiler -fopenmp # Compilation flag -CFLAGS = -Wall -Wextra -std=gnu99 -g +CFLAGS = -Wall -Wextra -std=gnu99 -g -O3 NVCCFLAGS = -g # Remove warnings about unused variables, functions, ... # -Wno-unused-parameter -Wno-unused-function -Wno-unused-variable -Wno-unused-but-set-variable @@ -87,6 +87,7 @@ $(BUILDDIR)/cnn-main: $(CNN_SRCDIR)/main.c \ $(BUILDDIR)/cnn_jpeg.o \ $(BUILDDIR)/cnn_convolution.o \ $(BUILDDIR)/cnn_backpropagation.o \ + $(BUILDDIR)/memory_management.o \ $(BUILDDIR)/colors.o \ $(BUILDDIR)/mnist.o \ $(BUILDDIR)/utils.o @@ -109,6 +110,7 @@ $(BUILDDIR)/cnn-main-cuda: $(BUILDDIR)/cnn_main.cuda.o \ $(BUILDDIR)/cnn_cuda_convolution.o \ $(BUILDDIR)/cnn_backpropagation.cuda.o \ $(BUILDDIR)/colors.cuda.o \ + $(BUILDDIR)/cuda_memory_management.o \ $(BUILDDIR)/mnist.cuda.o \ $(BUILDDIR)/cuda_utils.o $(NVCC) $(LD_NVCCFLAGS) $(NVCCFLAGS) $^ -o $@ @@ -120,7 +122,7 @@ endif $(BUILDDIR)/cnn-preview: $(CNN_SRCDIR)/preview.c $(BUILDDIR)/cnn_jpeg.o $(BUILDDIR)/colors.o $(BUILDDIR)/utils.o $(CC) $^ -o $@ $(CFLAGS) $(LD_CFLAGS) -$(BUILDDIR)/cnn-export: $(CNN_SRCDIR)/export.c $(BUILDDIR)/cnn_free.o $(BUILDDIR)/cnn_neuron_io.o $(BUILDDIR)/utils.o +$(BUILDDIR)/cnn-export: $(CNN_SRCDIR)/export.c $(BUILDDIR)/cnn_free.o $(BUILDDIR)/cnn_neuron_io.o $(BUILDDIR)/utils.o $(BUILDDIR)/memory_management.o $(BUILDDIR)/colors.o $(CC) $^ -o $@ $(CFLAGS) $(LD_CFLAGS) $(BUILDDIR)/cnn_%.o: $(CNN_SRCDIR)/%.c $(CNN_SRCDIR)/include/%.h @@ -166,19 +168,26 @@ prepare-tests: @rm -f $(BUILDDIR)/test-* -build/test-cnn_%: $(TEST_SRCDIR)/cnn_%.c $(CNN_OBJ) $(BUILDDIR)/colors.o $(BUILDDIR)/mnist.o $(BUILDDIR)/utils.o +build/test-cnn_%: $(TEST_SRCDIR)/cnn_%.c $(CNN_OBJ) $(BUILDDIR)/colors.o $(BUILDDIR)/mnist.o $(BUILDDIR)/utils.o $(BUILDDIR)/memory_management.o $(CC) $^ -o $@ $(CFLAGS) $(LD_CFLAGS) # mnist.o est déjà inclus en tant que mnist_mnist.o build/test-mnist_%: $(TEST_SRCDIR)/mnist_%.c $(MNIST_OBJ) $(BUILDDIR)/colors.o $(CC) $^ -o $@ $(CFLAGS) $(LD_CFLAGS) +build/test-memory_management: $(TEST_SRCDIR)/memory_management.c $(BUILDDIR)/colors.o $(BUILDDIR)/mnist.o $(BUILDDIR)/utils.o $(BUILDDIR)/test_memory_management.o + $(CC) $^ -o $@ $(CFLAGS) $(LD_CFLAGS) + +$(BUILDDIR)/test_memory_management.o: $(SRCDIR)/memory_management.c $(SRCDIR)/include/memory_management.h + $(CC) -c $< -o $@ $(CFLAGS) -DTEST_MEMORY_MANAGEMENT + ifdef NVCC_INSTALLED $(BUILDDIR)/test-cnn_%: $(TEST_SRCDIR)/cnn_%.cu \ $(BUILDDIR)/cnn_cuda_%.o \ $(BUILDDIR)/cuda_utils.o \ $(BUILDDIR)/colors.o \ - $(BUILDDIR)/mnist.cuda.o + $(BUILDDIR)/mnist.cuda.o \ + $(BUILDDIR)/cuda_memory_management.o $(NVCC) $(LD_NVCCFLAGS) $(NVCCFLAGS) $^ -o $@ else $(BUILDDIR)/test-cnn_%: $(TEST_SRCDIR)/cnn_%.cu diff --git a/src/cnn/creation.c b/src/cnn/creation.c index 2c261ad..4cdb41a 100644 --- a/src/cnn/creation.c +++ b/src/cnn/creation.c @@ -1,6 +1,7 @@ #include #include +#include "../include/memory_management.h" #include "include/initialisation.h" #include "include/function.h" #include "../include/utils.h" diff --git a/src/cnn/free.c b/src/cnn/free.c index 9595812..e0ce7bf 100644 --- a/src/cnn/free.c +++ b/src/cnn/free.c @@ -1,7 +1,7 @@ #include #include -#include "../include/utils.h" +#include "../include/memory_management.h" #include "include/free.h" @@ -103,8 +103,9 @@ void free_dense_linearisation(Network* network, int pos) { void free_network_creation(Network* network) { free_a_cube_input_layer(network, 0, network->depth[0], network->width[0]); - for (int i=0; i < network->max_size-1; i++) + for (int i=0; i < network->max_size-1; i++) { gree(network->kernel[i]); + } gree(network->width); gree(network->depth); gree(network->kernel); diff --git a/src/cnn/include/struct.h b/src/cnn/include/struct.h index c29d1a4..1f314d8 100644 --- a/src/cnn/include/struct.h +++ b/src/cnn/include/struct.h @@ -39,7 +39,7 @@ typedef struct Network{ int* depth; // depth[size] Kernel** kernel; // kernel[size], contient tous les kernels float**** input; // Tableau de toutes les couches du réseau input[size][couche->depth][couche->width][couche->width] - float**** input_z; // Même tableau que input mais ne contient paas la dernière fonction d'activation à chaque ligne + float**** input_z; // Même tableau que input mais ne contient pas la dernière fonction d'activation à chaque ligne } Network; #endif \ No newline at end of file diff --git a/src/cnn/jpeg.c b/src/cnn/jpeg.c index 6df0f98..cbcf7e7 100644 --- a/src/cnn/jpeg.c +++ b/src/cnn/jpeg.c @@ -53,9 +53,9 @@ imgRawImage* loadJpegImageFile(char* lpFilename) { #endif dwBufferBytes = imgWidth * imgHeight * 3; /* We only read RGB, not A */ - lpData = (unsigned char*)nalloc(sizeof(unsigned char)*dwBufferBytes); + lpData = (unsigned char*)malloc(sizeof(unsigned char)*dwBufferBytes); - lpNewImage = (imgRawImage*)nalloc(sizeof(imgRawImage)); + lpNewImage = (imgRawImage*)malloc(sizeof(imgRawImage)); lpNewImage->numComponents = numComponents; lpNewImage->width = imgWidth; lpNewImage->height = imgHeight; @@ -75,7 +75,7 @@ imgRawImage* loadJpegImageFile(char* lpFilename) { } jpegDataset* loadJpegDataset(char* folderPath) { - jpegDataset* dataset = (jpegDataset*)nalloc(sizeof(jpegDataset)); + jpegDataset* dataset = (jpegDataset*)malloc(sizeof(jpegDataset)); imgRawImage* image; // We start by counting the number of images and categories @@ -83,8 +83,8 @@ jpegDataset* loadJpegDataset(char* folderPath) { dataset->numImages = countFiles(folderPath); dataset->images = NULL; - dataset->labels = (unsigned int*)nalloc(sizeof(unsigned int)*dataset->numImages); - dataset->fileNames = (char**)nalloc(sizeof(char*)*dataset->numImages); + dataset->labels = (unsigned int*)malloc(sizeof(unsigned int)*dataset->numImages); + dataset->fileNames = (char**)malloc(sizeof(char*)*dataset->numImages); DIR* dirp; struct dirent* entry; @@ -97,17 +97,17 @@ jpegDataset* loadJpegDataset(char* folderPath) { if (strcmp(entry->d_name, ".")&&strcmp(entry->d_name, "..")) { if (entry->d_type == DT_DIR) { prev_index = index; - concatenated_path = nalloc(strlen(folderPath)+strlen(entry->d_name)+2); + concatenated_path = malloc(strlen(folderPath)+strlen(entry->d_name)+2); sprintf(concatenated_path, "%s/%s", folderPath, entry->d_name); addFilenamesToArray(concatenated_path, dataset->fileNames, &index); for (int i=prev_index; i < index; i++) { dataset->labels[i] = getLabel(entry->d_name); } - gree(concatenated_path); + free(concatenated_path); } } } - dataset->images = (unsigned char**)nalloc(sizeof(unsigned char*)*dataset->numImages); + dataset->images = (unsigned char**)malloc(sizeof(unsigned char*)*dataset->numImages); for (int i=0; i < (int)dataset->numImages; i++) { dataset->images[i] = NULL; #ifdef STORE_IMAGES_TO_RAM @@ -117,7 +117,7 @@ jpegDataset* loadJpegDataset(char* folderPath) { } image = loadJpegImageFile(dataset->fileNames[i]); dataset->images[i] = image->lpData; - gree(image); + free(image); #endif } #ifdef STORE_IMAGES_TO_RAM @@ -130,8 +130,8 @@ jpegDataset* loadJpegDataset(char* folderPath) { dataset->height = image->height; dataset->numComponents = image->numComponents; - gree(image->lpData); - gree(image); + free(image->lpData); + free(image); closedir(dirp); return dataset; @@ -185,7 +185,7 @@ void addFilenamesToArray(char* path, char** array, int* index) { dirp = opendir(path); /* There should be error handling after this */ while ((entry = readdir(dirp)) != NULL) { if (entry->d_type == DT_REG) { /* If the entry is a regular file */ - filename = (char*)nalloc(strlen(path)+strlen(entry->d_name)+2); + filename = (char*)malloc(strlen(path)+strlen(entry->d_name)+2); sprintf(filename, "%s/%s", path, entry->d_name); array[i] = filename; i++; @@ -197,15 +197,15 @@ void addFilenamesToArray(char* path, char** array, int* index) { void free_dataset(jpegDataset* dataset) { for (int i=0; i < (int)dataset->numImages; i++) { - gree(dataset->fileNames[i]); + free(dataset->fileNames[i]); #ifdef STORE_IMAGES_TO_RAM - gree(dataset->images[i]); + free(dataset->images[i]); #endif } - gree(dataset->fileNames); - gree(dataset->labels); - gree(dataset->images); - gree(dataset); + free(dataset->fileNames); + free(dataset->labels); + free(dataset->images); + free(dataset); } unsigned int getLabel(char* string) { diff --git a/src/cnn/neuron_io.c b/src/cnn/neuron_io.c index f42a605..987e19b 100644 --- a/src/cnn/neuron_io.c +++ b/src/cnn/neuron_io.c @@ -3,8 +3,8 @@ #include #include +#include "../include/memory_management.h" #include "../include/colors.h" -#include "../include/utils.h" #include "include/function.h" #include "include/struct.h" diff --git a/src/cnn/preview.c b/src/cnn/preview.c index 2256eae..a4a120e 100644 --- a/src/cnn/preview.c +++ b/src/cnn/preview.c @@ -38,11 +38,11 @@ void preview_images(char* path, int limit) { if (!dataset->images[i]) { image = loadJpegImageFile(dataset->fileNames[i]); dataset->images[i] = image->lpData; - gree(image); + free(image); } print_image(dataset->images[i], dataset->height, dataset->width); - gree(dataset->images[i]); + free(dataset->images[i]); } } diff --git a/src/cnn/test_network.c b/src/cnn/test_network.c index 8987797..34e9960 100644 --- a/src/cnn/test_network.c +++ b/src/cnn/test_network.c @@ -3,9 +3,9 @@ #include #include +#include "../include/memory_management.h" #include "../mnist/include/mnist.h" #include "include/neuron_io.h" -#include "../include/utils.h" #include "include/struct.h" #include "include/jpeg.h" #include "include/free.h" diff --git a/src/cnn/train.c b/src/cnn/train.c index 4fcc00b..a7a3e49 100644 --- a/src/cnn/train.c +++ b/src/cnn/train.c @@ -7,6 +7,7 @@ #include #include +#include "../include/memory_management.h" #include "../mnist/include/mnist.h" #include "include/initialisation.h" #include "include/neuron_io.h" @@ -131,7 +132,7 @@ void train(int dataset_type, char* images_file, char* labels_file, char* data_di // Chargement des images du set de données MNIST int* parameters = read_mnist_images_parameters(images_file); nb_images_total = parameters[0]; - gree(parameters); + free(parameters); images = read_mnist_images(images_file); labels = read_mnist_labels(labels_file); @@ -199,7 +200,7 @@ void train(int dataset_type, char* images_file, char* labels_file, char* data_di // thread dans l'hypothèse ou le multi-threading n'est pas utilisé. // Cela est utile à des fins de débogage notamment, // où l'utilisation de threads rend vite les choses plus compliquées qu'elles ne le sont. - TrainParameters* train_params = (TrainParameters*)nalloc(sizeof(TrainParameters)); + TrainParameters* train_params = (TrainParameters*)malloc(sizeof(TrainParameters)); train_params->network = network; train_params->dataset_type = dataset_type; diff --git a/src/cnn/utils.c b/src/cnn/utils.c index 3815084..7a1031f 100644 --- a/src/cnn/utils.c +++ b/src/cnn/utils.c @@ -3,8 +3,8 @@ #include #include +#include "../include/memory_management.h" #include "../include/colors.h" -#include "../include/utils.h" #include "include/struct.h" #define copyVar(var) network_cp->var = network->var diff --git a/src/include/memory_management.h b/src/include/memory_management.h new file mode 100644 index 0000000..e128a33 --- /dev/null +++ b/src/include/memory_management.h @@ -0,0 +1,52 @@ +#include +#include + +#ifndef DEF_MEM_MANAGEMENT_H +#define DEF_MEM_MANAGEMENT_H + +// A block of memory is 48kB +// https://forums.developer.nvidia.com/t/find-the-limit-of-shared-memory-that-can-be-used-per-block/48556 +#define MEMORY_BLOCK 49152 + + +// We define our memory with a linked list of memory blocks +typedef struct Memory { + void* start; // Start of the allocated memory + void* cursor; // Current cursor + size_t size; // Taille de la mémoire allouée + int nb_alloc; // Nombre d'allocations dans le bloc + struct Memory* next; // Élément suivant +} Memory; + +// Renvoie le nombre d'allocations totales dans la mémoire +int get_memory_distinct_allocations(); + +// Fonction récursive correspondante +int get_distinct_allocations(Memory* mem); + +// Renvoie le nombre d'éléments dans la liste chaînée représentant la mémoire +int get_memory_blocks_number(); + +// Renvoie la taille d'une liste chaînée +int get_length(Memory* mem); + +// Créer un bloc de mémoire de taille size +Memory* create_memory_block(size_t size); + +// Allouer un élément de taille size dans mem +void* allocate_memory(size_t size, Memory* mem); + +// Essayer de libérer le pointeur représenté par ptr dans mem +Memory* free_memory(void* ptr, Memory* mem); + +#ifdef __CUDACC__ +extern "C" +#endif +void* nalloc(size_t sz); + +#ifdef __CUDACC__ +extern "C" +#endif +void gree(void* ptr); + +#endif \ No newline at end of file diff --git a/src/include/utils.h b/src/include/utils.h index 96a78b4..646a2db 100644 --- a/src/include/utils.h +++ b/src/include/utils.h @@ -38,14 +38,4 @@ extern "C" #endif bool check_cuda_compatibility(); -#ifdef __CUDACC__ -extern "C" -#endif -void* nalloc(size_t sz); - -#ifdef __CUDACC__ -extern "C" -#endif -void gree(void* ptr); - #endif \ No newline at end of file diff --git a/src/memory_management.c b/src/memory_management.c new file mode 100644 index 0000000..71c1248 --- /dev/null +++ b/src/memory_management.c @@ -0,0 +1,132 @@ +#include +#include +#include +#include + +#include "include/memory_management.h" +#include "include/colors.h" + + +Memory* memory = NULL; +pthread_mutex_t memory_lock = PTHREAD_MUTEX_INITIALIZER; + + +int get_distinct_allocations(Memory* mem) { + if (!mem) { + return 0; + } + return mem->nb_alloc + get_distinct_allocations(mem->next); +} + + +int get_length(Memory* mem) { + if (!mem) { + return 0; + } + return 1 + get_distinct_allocations(mem->next); +} + + +int get_memory_distinct_allocations() { + return get_distinct_allocations(memory); +} + +int get_memory_blocks_number() { + return get_length(memory); +} + + +Memory* create_memory_block(size_t size) { + Memory* mem = (Memory*)malloc(sizeof(Memory)); + #ifdef __CUDACC__ + cudaMallocManaged(&(mem->start), size, cudaMemAttachHost); + #else + mem->start = malloc(size); + #endif + mem->cursor = mem->start; + mem->size = size; + mem->nb_alloc = 0; + mem->next = NULL; + + return mem; +} + + +void* allocate_memory(size_t size, Memory* mem) { + // Si il y a suffisamment de mémoire disponible + if (mem->size - ((intptr_t)mem->cursor - (intptr_t)mem->start) >= size) { + void* ptr = mem->cursor; + mem->cursor = (void*)((intptr_t)mem->cursor + size); // On décale le curseur de la taille allouée + mem->nb_alloc++; + return ptr; + } else { + printf("Mémoire disponible: %ld. Nécessaire: %ld\n", mem->size - ((intptr_t)mem->cursor - (intptr_t)mem->start), size); + // Sinon on continue sur l'élément suivant de la liste + if (!mem->next) { + mem->next = create_memory_block(MEMORY_BLOCK < size ? size : MEMORY_BLOCK); + } + return allocate_memory(size, mem->next); + } +} + + +Memory* free_memory(void* ptr, Memory* mem) { + if (!mem) { + printf("[ERREUR] Le pointeur %p a déjà été libéré ou n'a jamais été alloué\n", ptr); + return mem; + } + if ((intptr_t)mem->start <= (intptr_t)ptr && (intptr_t)ptr <= (intptr_t)mem->start + (intptr_t)mem->size) { + mem->nb_alloc--; + if (mem->nb_alloc == 0) { + Memory* mem_next = mem->next; + #ifdef __CUDACC__ + cudaFree(mem->start); + #else + free(mem->start); + #endif + free(mem); + return mem_next; + } else { + return mem; + } + } else { + mem->next = free_memory(ptr, mem->next); + return mem; + } +} + + +#ifdef __CUDACC__ +extern "C" +#endif +void* nalloc(size_t sz) { + #if defined(USE_CUDA) || defined(TEST_MEMORY_MANAGEMENT) + pthread_mutex_lock(&memory_lock); + if (!memory) { + // We allocate a new memory block + memory = create_memory_block(MEMORY_BLOCK < sz ? sz : MEMORY_BLOCK); + } + printf("Distinct allocations: %d Blocks: %d\n", get_distinct_allocations(memory), get_length(memory)); + printf("Requested memory of size %ld\n", sz); + void* ptr = allocate_memory(sz, memory); + + pthread_mutex_unlock(&memory_lock); + return ptr; + #else + void* ptr = malloc(sz); + return ptr; + #endif +} + +#ifdef __CUDACC__ +extern "C" +#endif +void gree(void* ptr) { + #if defined(USE_CUDA) || defined(TEST_MEMORY_MANAGEMENT) + pthread_mutex_lock(&memory_lock); + free_memory(ptr, memory); + pthread_mutex_unlock(&memory_lock); + #else + free(ptr); + #endif +} \ No newline at end of file diff --git a/src/memory_management.cu b/src/memory_management.cu new file mode 100644 index 0000000..71c1248 --- /dev/null +++ b/src/memory_management.cu @@ -0,0 +1,132 @@ +#include +#include +#include +#include + +#include "include/memory_management.h" +#include "include/colors.h" + + +Memory* memory = NULL; +pthread_mutex_t memory_lock = PTHREAD_MUTEX_INITIALIZER; + + +int get_distinct_allocations(Memory* mem) { + if (!mem) { + return 0; + } + return mem->nb_alloc + get_distinct_allocations(mem->next); +} + + +int get_length(Memory* mem) { + if (!mem) { + return 0; + } + return 1 + get_distinct_allocations(mem->next); +} + + +int get_memory_distinct_allocations() { + return get_distinct_allocations(memory); +} + +int get_memory_blocks_number() { + return get_length(memory); +} + + +Memory* create_memory_block(size_t size) { + Memory* mem = (Memory*)malloc(sizeof(Memory)); + #ifdef __CUDACC__ + cudaMallocManaged(&(mem->start), size, cudaMemAttachHost); + #else + mem->start = malloc(size); + #endif + mem->cursor = mem->start; + mem->size = size; + mem->nb_alloc = 0; + mem->next = NULL; + + return mem; +} + + +void* allocate_memory(size_t size, Memory* mem) { + // Si il y a suffisamment de mémoire disponible + if (mem->size - ((intptr_t)mem->cursor - (intptr_t)mem->start) >= size) { + void* ptr = mem->cursor; + mem->cursor = (void*)((intptr_t)mem->cursor + size); // On décale le curseur de la taille allouée + mem->nb_alloc++; + return ptr; + } else { + printf("Mémoire disponible: %ld. Nécessaire: %ld\n", mem->size - ((intptr_t)mem->cursor - (intptr_t)mem->start), size); + // Sinon on continue sur l'élément suivant de la liste + if (!mem->next) { + mem->next = create_memory_block(MEMORY_BLOCK < size ? size : MEMORY_BLOCK); + } + return allocate_memory(size, mem->next); + } +} + + +Memory* free_memory(void* ptr, Memory* mem) { + if (!mem) { + printf("[ERREUR] Le pointeur %p a déjà été libéré ou n'a jamais été alloué\n", ptr); + return mem; + } + if ((intptr_t)mem->start <= (intptr_t)ptr && (intptr_t)ptr <= (intptr_t)mem->start + (intptr_t)mem->size) { + mem->nb_alloc--; + if (mem->nb_alloc == 0) { + Memory* mem_next = mem->next; + #ifdef __CUDACC__ + cudaFree(mem->start); + #else + free(mem->start); + #endif + free(mem); + return mem_next; + } else { + return mem; + } + } else { + mem->next = free_memory(ptr, mem->next); + return mem; + } +} + + +#ifdef __CUDACC__ +extern "C" +#endif +void* nalloc(size_t sz) { + #if defined(USE_CUDA) || defined(TEST_MEMORY_MANAGEMENT) + pthread_mutex_lock(&memory_lock); + if (!memory) { + // We allocate a new memory block + memory = create_memory_block(MEMORY_BLOCK < sz ? sz : MEMORY_BLOCK); + } + printf("Distinct allocations: %d Blocks: %d\n", get_distinct_allocations(memory), get_length(memory)); + printf("Requested memory of size %ld\n", sz); + void* ptr = allocate_memory(sz, memory); + + pthread_mutex_unlock(&memory_lock); + return ptr; + #else + void* ptr = malloc(sz); + return ptr; + #endif +} + +#ifdef __CUDACC__ +extern "C" +#endif +void gree(void* ptr) { + #if defined(USE_CUDA) || defined(TEST_MEMORY_MANAGEMENT) + pthread_mutex_lock(&memory_lock); + free_memory(ptr, memory); + pthread_mutex_unlock(&memory_lock); + #else + free(ptr); + #endif +} \ No newline at end of file diff --git a/src/utils.c b/src/utils.c index 23f5847..e613a13 100644 --- a/src/utils.c +++ b/src/utils.c @@ -43,37 +43,3 @@ bool check_cuda_compatibility() { return false; #endif } - - -#ifndef USE_CUDA - #ifdef __CUDACC__ - extern "C" - #endif - void* nalloc(size_t sz) { - void* ptr = malloc(sz); - return ptr; - } - - #ifdef __CUDACC__ - extern "C" - #endif - void gree(void* ptr) { - free(ptr); - } -#else - #ifdef __CUDACC__ - extern "C" - #endif - void* nalloc(size_t sz) { - void* ptr; - cudaMallocManaged(&ptr, sz, cudaMemAttachHost); - return ptr; - } - - #ifdef __CUDACC__ - extern "C" - #endif - void gree(void* ptr) { - cudaFree(ptr); - } -#endif diff --git a/src/utils.cu b/src/utils.cu index 23f5847..e613a13 100644 --- a/src/utils.cu +++ b/src/utils.cu @@ -43,37 +43,3 @@ bool check_cuda_compatibility() { return false; #endif } - - -#ifndef USE_CUDA - #ifdef __CUDACC__ - extern "C" - #endif - void* nalloc(size_t sz) { - void* ptr = malloc(sz); - return ptr; - } - - #ifdef __CUDACC__ - extern "C" - #endif - void gree(void* ptr) { - free(ptr); - } -#else - #ifdef __CUDACC__ - extern "C" - #endif - void* nalloc(size_t sz) { - void* ptr; - cudaMallocManaged(&ptr, sz, cudaMemAttachHost); - return ptr; - } - - #ifdef __CUDACC__ - extern "C" - #endif - void gree(void* ptr) { - cudaFree(ptr); - } -#endif diff --git a/test/cnn_convolution.cu b/test/cnn_convolution.cu index 4bfe4db..9341340 100644 --- a/test/cnn_convolution.cu +++ b/test/cnn_convolution.cu @@ -6,6 +6,7 @@ #include #include +#include "../src/include/memory_management.h" #include "../src/cnn/include/convolution.h" #include "../src/cnn/include/struct.h" #include "../src/include/colors.h" diff --git a/test/cnn_matrix_multiplication.cu b/test/cnn_matrix_multiplication.cu index cd322b8..c87fe6d 100644 --- a/test/cnn_matrix_multiplication.cu +++ b/test/cnn_matrix_multiplication.cu @@ -6,6 +6,7 @@ #include #include "../src/cnn/include/matrix_multiplication.h" +#include "../src/include/memory_management.h" #include "../src/include/colors.h" #include "../src/include/utils.h" diff --git a/test/memory_management.c b/test/memory_management.c new file mode 100644 index 0000000..f7e7d2f --- /dev/null +++ b/test/memory_management.c @@ -0,0 +1,70 @@ +#include +#include +#include + +#include "../src/include/memory_management.h" +#include "../src/include/colors.h" + +#define N 350 + +int main() { + printf("Pollution de la mémoire\n"); + int mem_used; + int blocks_used; + // We pollute a little bit the memory before the tests + int* pointeurs[N]; + for (int i=1; i < N; i++) { + pointeurs[i] = nalloc(i*sizeof(int)); + for (int j=0; j < i; j++) { + pointeurs[i][j] = i; + } + } + + // We test in a first place that one simple allocation works as expected + mem_used = get_memory_distinct_allocations(); + blocks_used = get_memory_blocks_number(); + void* ptr = nalloc(15); + if (! (get_memory_distinct_allocations() <= mem_used+1)) { + printf_error("Plus d'un élément de mémoire alloué en une seule allocation\n"); + exit(1); + } + gree(ptr); + if (! (get_memory_blocks_number() == blocks_used)) { + printf_error("La mémoire n'a pas été libérée correctement\n"); + exit(1); + } + printf(GREEN "OK\n" RESET); + + + + printf("Allocation de deux demi-blocs\n"); + // We test that we do not use too much blocks + blocks_used = get_memory_blocks_number(); + void* ptr1 = nalloc(-1+MEMORY_BLOCK/2); + void* ptr2 = nalloc(-1+MEMORY_BLOCK/2); + if (! (get_memory_blocks_number() <= blocks_used +1)) { + printf_error("Trop de blocs ont été alloués par rapport à la mémoire nécessaire\n"); + exit(1); + } + printf(GREEN "OK\n" RESET); + + + + printf("Libération de la mémoire\n"); + for (int i=1; i < N; i++) { + for (int j=0; j < i; j++) { + // We test that the memory does not overlap itself + assert(pointeurs[i][j] == i); + } + gree(pointeurs[i]); + } + + gree(ptr1); + gree(ptr2); + if (! (get_memory_distinct_allocations() == 0 && get_memory_blocks_number() == 0)) { + printf_error("La mémoire n'a pas été libérée correctement\n"); + exit(1); + } + printf(GREEN "OK\n" RESET); + return 0; +} \ No newline at end of file