diff --git a/src/common/include/memory_management.h b/src/common/include/memory_management.h
index 008a2b0..6f28cce 100644
--- a/src/common/include/memory_management.h
+++ b/src/common/include/memory_management.h
@@ -8,6 +8,11 @@
 // https://forums.developer.nvidia.com/t/find-the-limit-of-shared-memory-that-can-be-used-per-block/48556
 #define MEMORY_BLOCK 49152
 
+// On n'alloue de la mémoire que dans le dernier bloc créé, on ne parcourt donc pas la liste
+// Cela augmente légèrement l'utilisation de la mémoire, mais permet un gain de temps conséquent
+// Pour VGG16, environ 1% de mémoire supplémentaire utilisée,
+// L'initialisation passe de 1h02 à 2.4s sur mon matériel
+#define MEMORY_TAIL_OPT
 
 // We define our memory with a linked list of memory blocks
 typedef struct Memory {
diff --git a/src/common/memory_management.c b/src/common/memory_management.c
index c895694..5a403dc 100644
--- a/src/common/memory_management.c
+++ b/src/common/memory_management.c
@@ -8,8 +8,12 @@
 #include "include/utils.h"
 
 
-Memory* memory = NULL;
 pthread_mutex_t memory_lock = PTHREAD_MUTEX_INITIALIZER;
+Memory* memory = NULL;
+#ifdef MEMORY_TAIL_OPT
+    Memory* tail = NULL;
+#endif
+
 
 
 int get_distinct_allocations(Memory* mem) {
@@ -68,6 +72,9 @@ Memory* create_memory_block(size_t size) {
     mem->nb_alloc = 0;
     mem->next = NULL;
     mem->id = rand() %100000;
+    #ifdef MEMORY_TAIL_OPT
+    tail = mem;
+    #endif
     
     return mem;
 }
@@ -116,6 +123,13 @@ Memory* free_memory(void* ptr, Memory* mem) {
         // printf(GREEN "%p <= %p < %p\n" RESET, mem->start, ptr, (void*)((intptr_t)mem->start + mem->size));
         if (mem->nb_alloc == 0) {
             Memory* mem_next = mem->next;
+
+            #ifdef MEMORY_TAIL_OPT
+            if (tail == mem) {
+                tail = memory;
+            }
+            #endif
+
             #ifdef __CUDACC__
             cudaFree(mem->start);
             #else
@@ -145,7 +159,11 @@ void* nalloc(int nb_elements, size_t size) {
         }
         //printf("Distinct allocations: %d Blocks: %d\n", get_distinct_allocations(memory), get_length(memory));
         //printf("Requested memory of size %ld\n", sz);
+        #ifdef MEMORY_TAIL_OPT
+        void* ptr = allocate_memory(nb_elements, size, tail);
+        #else
         void* ptr = allocate_memory(nb_elements, size, memory);
+        #endif
 
         pthread_mutex_unlock(&memory_lock);
         return ptr;
diff --git a/src/common/memory_management.cu b/src/common/memory_management.cu
index c895694..5a403dc 100644
--- a/src/common/memory_management.cu
+++ b/src/common/memory_management.cu
@@ -8,8 +8,12 @@
 #include "include/utils.h"
 
 
-Memory* memory = NULL;
 pthread_mutex_t memory_lock = PTHREAD_MUTEX_INITIALIZER;
+Memory* memory = NULL;
+#ifdef MEMORY_TAIL_OPT
+    Memory* tail = NULL;
+#endif
+
 
 
 int get_distinct_allocations(Memory* mem) {
@@ -68,6 +72,9 @@ Memory* create_memory_block(size_t size) {
     mem->nb_alloc = 0;
     mem->next = NULL;
     mem->id = rand() %100000;
+    #ifdef MEMORY_TAIL_OPT
+    tail = mem;
+    #endif
     
     return mem;
 }
@@ -116,6 +123,13 @@ Memory* free_memory(void* ptr, Memory* mem) {
         // printf(GREEN "%p <= %p < %p\n" RESET, mem->start, ptr, (void*)((intptr_t)mem->start + mem->size));
         if (mem->nb_alloc == 0) {
             Memory* mem_next = mem->next;
+
+            #ifdef MEMORY_TAIL_OPT
+            if (tail == mem) {
+                tail = memory;
+            }
+            #endif
+
             #ifdef __CUDACC__
             cudaFree(mem->start);
             #else
@@ -145,7 +159,11 @@ void* nalloc(int nb_elements, size_t size) {
         }
         //printf("Distinct allocations: %d Blocks: %d\n", get_distinct_allocations(memory), get_length(memory));
         //printf("Requested memory of size %ld\n", sz);
+        #ifdef MEMORY_TAIL_OPT
+        void* ptr = allocate_memory(nb_elements, size, tail);
+        #else
         void* ptr = allocate_memory(nb_elements, size, memory);
+        #endif
 
         pthread_mutex_unlock(&memory_lock);
         return ptr;