mirror of
https://github.com/augustin64/projet-tipe
synced 2025-01-23 23:26:25 +01:00
mem management: Add optional tail optimisation
VGG16: memory usage increases of ~1%, but initialisation time (for CUDA) goes from 1h down to 2.4s
This commit is contained in:
parent
003183d3fd
commit
4cffcc1c95
@ -8,6 +8,11 @@
|
||||
// https://forums.developer.nvidia.com/t/find-the-limit-of-shared-memory-that-can-be-used-per-block/48556
|
||||
#define MEMORY_BLOCK 49152
|
||||
|
||||
// On n'alloue de la mémoire que dans le dernier bloc créé, on ne parcourt donc pas la liste
|
||||
// Cela augmente légèrement l'utilisation de la mémoire, mais permet un gain de temps conséquent
|
||||
// Pour VGG16, environ 1% de mémoire supplémentaire utilisée,
|
||||
// L'initialisation passe de 1h02 à 2.4s sur mon matériel
|
||||
#define MEMORY_TAIL_OPT
|
||||
|
||||
// We define our memory with a linked list of memory blocks
|
||||
typedef struct Memory {
|
||||
|
@ -8,8 +8,12 @@
|
||||
#include "include/utils.h"
|
||||
|
||||
|
||||
Memory* memory = NULL;
|
||||
pthread_mutex_t memory_lock = PTHREAD_MUTEX_INITIALIZER;
|
||||
Memory* memory = NULL;
|
||||
#ifdef MEMORY_TAIL_OPT
|
||||
Memory* tail = NULL;
|
||||
#endif
|
||||
|
||||
|
||||
|
||||
int get_distinct_allocations(Memory* mem) {
|
||||
@ -68,6 +72,9 @@ Memory* create_memory_block(size_t size) {
|
||||
mem->nb_alloc = 0;
|
||||
mem->next = NULL;
|
||||
mem->id = rand() %100000;
|
||||
#ifdef MEMORY_TAIL_OPT
|
||||
tail = mem;
|
||||
#endif
|
||||
|
||||
return mem;
|
||||
}
|
||||
@ -116,6 +123,13 @@ Memory* free_memory(void* ptr, Memory* mem) {
|
||||
// printf(GREEN "%p <= %p < %p\n" RESET, mem->start, ptr, (void*)((intptr_t)mem->start + mem->size));
|
||||
if (mem->nb_alloc == 0) {
|
||||
Memory* mem_next = mem->next;
|
||||
|
||||
#ifdef MEMORY_TAIL_OPT
|
||||
if (tail == mem) {
|
||||
tail = memory;
|
||||
}
|
||||
#endif
|
||||
|
||||
#ifdef __CUDACC__
|
||||
cudaFree(mem->start);
|
||||
#else
|
||||
@ -145,7 +159,11 @@ void* nalloc(int nb_elements, size_t size) {
|
||||
}
|
||||
//printf("Distinct allocations: %d Blocks: %d\n", get_distinct_allocations(memory), get_length(memory));
|
||||
//printf("Requested memory of size %ld\n", sz);
|
||||
#ifdef MEMORY_TAIL_OPT
|
||||
void* ptr = allocate_memory(nb_elements, size, tail);
|
||||
#else
|
||||
void* ptr = allocate_memory(nb_elements, size, memory);
|
||||
#endif
|
||||
|
||||
pthread_mutex_unlock(&memory_lock);
|
||||
return ptr;
|
||||
|
@ -8,8 +8,12 @@
|
||||
#include "include/utils.h"
|
||||
|
||||
|
||||
Memory* memory = NULL;
|
||||
pthread_mutex_t memory_lock = PTHREAD_MUTEX_INITIALIZER;
|
||||
Memory* memory = NULL;
|
||||
#ifdef MEMORY_TAIL_OPT
|
||||
Memory* tail = NULL;
|
||||
#endif
|
||||
|
||||
|
||||
|
||||
int get_distinct_allocations(Memory* mem) {
|
||||
@ -68,6 +72,9 @@ Memory* create_memory_block(size_t size) {
|
||||
mem->nb_alloc = 0;
|
||||
mem->next = NULL;
|
||||
mem->id = rand() %100000;
|
||||
#ifdef MEMORY_TAIL_OPT
|
||||
tail = mem;
|
||||
#endif
|
||||
|
||||
return mem;
|
||||
}
|
||||
@ -116,6 +123,13 @@ Memory* free_memory(void* ptr, Memory* mem) {
|
||||
// printf(GREEN "%p <= %p < %p\n" RESET, mem->start, ptr, (void*)((intptr_t)mem->start + mem->size));
|
||||
if (mem->nb_alloc == 0) {
|
||||
Memory* mem_next = mem->next;
|
||||
|
||||
#ifdef MEMORY_TAIL_OPT
|
||||
if (tail == mem) {
|
||||
tail = memory;
|
||||
}
|
||||
#endif
|
||||
|
||||
#ifdef __CUDACC__
|
||||
cudaFree(mem->start);
|
||||
#else
|
||||
@ -145,7 +159,11 @@ void* nalloc(int nb_elements, size_t size) {
|
||||
}
|
||||
//printf("Distinct allocations: %d Blocks: %d\n", get_distinct_allocations(memory), get_length(memory));
|
||||
//printf("Requested memory of size %ld\n", sz);
|
||||
#ifdef MEMORY_TAIL_OPT
|
||||
void* ptr = allocate_memory(nb_elements, size, tail);
|
||||
#else
|
||||
void* ptr = allocate_memory(nb_elements, size, memory);
|
||||
#endif
|
||||
|
||||
pthread_mutex_unlock(&memory_lock);
|
||||
return ptr;
|
||||
|
Loading…
Reference in New Issue
Block a user