mem management: Add optional tail optimisation

VGG16: memory usage increases of ~1%,
but initialisation time (for CUDA) goes from 1h down to 2.4s
This commit is contained in:
augustin64 2023-05-15 10:07:00 +02:00
parent 003183d3fd
commit 4cffcc1c95
3 changed files with 43 additions and 2 deletions

View File

@ -8,6 +8,11 @@
// https://forums.developer.nvidia.com/t/find-the-limit-of-shared-memory-that-can-be-used-per-block/48556
#define MEMORY_BLOCK 49152
// On n'alloue de la mémoire que dans le dernier bloc créé, on ne parcourt donc pas la liste
// Cela augmente légèrement l'utilisation de la mémoire, mais permet un gain de temps conséquent
// Pour VGG16, environ 1% de mémoire supplémentaire utilisée,
// L'initialisation passe de 1h02 à 2.4s sur mon matériel
#define MEMORY_TAIL_OPT
// We define our memory with a linked list of memory blocks
typedef struct Memory {

View File

@ -8,8 +8,12 @@
#include "include/utils.h"
Memory* memory = NULL;
pthread_mutex_t memory_lock = PTHREAD_MUTEX_INITIALIZER;
Memory* memory = NULL;
#ifdef MEMORY_TAIL_OPT
Memory* tail = NULL;
#endif
int get_distinct_allocations(Memory* mem) {
@ -68,6 +72,9 @@ Memory* create_memory_block(size_t size) {
mem->nb_alloc = 0;
mem->next = NULL;
mem->id = rand() %100000;
#ifdef MEMORY_TAIL_OPT
tail = mem;
#endif
return mem;
}
@ -116,6 +123,13 @@ Memory* free_memory(void* ptr, Memory* mem) {
// printf(GREEN "%p <= %p < %p\n" RESET, mem->start, ptr, (void*)((intptr_t)mem->start + mem->size));
if (mem->nb_alloc == 0) {
Memory* mem_next = mem->next;
#ifdef MEMORY_TAIL_OPT
if (tail == mem) {
tail = memory;
}
#endif
#ifdef __CUDACC__
cudaFree(mem->start);
#else
@ -145,7 +159,11 @@ void* nalloc(int nb_elements, size_t size) {
}
//printf("Distinct allocations: %d Blocks: %d\n", get_distinct_allocations(memory), get_length(memory));
//printf("Requested memory of size %ld\n", sz);
#ifdef MEMORY_TAIL_OPT
void* ptr = allocate_memory(nb_elements, size, tail);
#else
void* ptr = allocate_memory(nb_elements, size, memory);
#endif
pthread_mutex_unlock(&memory_lock);
return ptr;

View File

@ -8,8 +8,12 @@
#include "include/utils.h"
Memory* memory = NULL;
pthread_mutex_t memory_lock = PTHREAD_MUTEX_INITIALIZER;
Memory* memory = NULL;
#ifdef MEMORY_TAIL_OPT
Memory* tail = NULL;
#endif
int get_distinct_allocations(Memory* mem) {
@ -68,6 +72,9 @@ Memory* create_memory_block(size_t size) {
mem->nb_alloc = 0;
mem->next = NULL;
mem->id = rand() %100000;
#ifdef MEMORY_TAIL_OPT
tail = mem;
#endif
return mem;
}
@ -116,6 +123,13 @@ Memory* free_memory(void* ptr, Memory* mem) {
// printf(GREEN "%p <= %p < %p\n" RESET, mem->start, ptr, (void*)((intptr_t)mem->start + mem->size));
if (mem->nb_alloc == 0) {
Memory* mem_next = mem->next;
#ifdef MEMORY_TAIL_OPT
if (tail == mem) {
tail = memory;
}
#endif
#ifdef __CUDACC__
cudaFree(mem->start);
#else
@ -145,7 +159,11 @@ void* nalloc(int nb_elements, size_t size) {
}
//printf("Distinct allocations: %d Blocks: %d\n", get_distinct_allocations(memory), get_length(memory));
//printf("Requested memory of size %ld\n", sz);
#ifdef MEMORY_TAIL_OPT
void* ptr = allocate_memory(nb_elements, size, tail);
#else
void* ptr = allocate_memory(nb_elements, size, memory);
#endif
pthread_mutex_unlock(&memory_lock);
return ptr;