Add custom memory_management

This commit is contained in:
augustin64 2023-02-18 13:03:08 +01:00
parent 866e2f9a16
commit 795d8b68d7
9 changed files with 400 additions and 83 deletions

View File

@ -30,7 +30,7 @@ LD_CFLAGS = -lm -lpthread -ljpeg -fopenmp
LD_NVCCFLAGS = -ljpeg -Xcompiler -fopenmp LD_NVCCFLAGS = -ljpeg -Xcompiler -fopenmp
# Compilation flag # Compilation flag
CFLAGS = -Wall -Wextra -std=gnu99 -g CFLAGS = -Wall -Wextra -std=gnu99 -g -O3
NVCCFLAGS = -g NVCCFLAGS = -g
# Remove warnings about unused variables, functions, ... # Remove warnings about unused variables, functions, ...
# -Wno-unused-parameter -Wno-unused-function -Wno-unused-variable -Wno-unused-but-set-variable # -Wno-unused-parameter -Wno-unused-function -Wno-unused-variable -Wno-unused-but-set-variable
@ -87,6 +87,7 @@ $(BUILDDIR)/cnn-main: $(CNN_SRCDIR)/main.c \
$(BUILDDIR)/cnn_jpeg.o \ $(BUILDDIR)/cnn_jpeg.o \
$(BUILDDIR)/cnn_convolution.o \ $(BUILDDIR)/cnn_convolution.o \
$(BUILDDIR)/cnn_backpropagation.o \ $(BUILDDIR)/cnn_backpropagation.o \
$(BUILDDIR)/memory_management.o \
$(BUILDDIR)/colors.o \ $(BUILDDIR)/colors.o \
$(BUILDDIR)/mnist.o \ $(BUILDDIR)/mnist.o \
$(BUILDDIR)/utils.o $(BUILDDIR)/utils.o
@ -109,6 +110,7 @@ $(BUILDDIR)/cnn-main-cuda: $(BUILDDIR)/cnn_main.cuda.o \
$(BUILDDIR)/cnn_cuda_convolution.o \ $(BUILDDIR)/cnn_cuda_convolution.o \
$(BUILDDIR)/cnn_backpropagation.cuda.o \ $(BUILDDIR)/cnn_backpropagation.cuda.o \
$(BUILDDIR)/colors.cuda.o \ $(BUILDDIR)/colors.cuda.o \
$(BUILDDIR)/cuda_memory_management.o \
$(BUILDDIR)/mnist.cuda.o \ $(BUILDDIR)/mnist.cuda.o \
$(BUILDDIR)/cuda_utils.o $(BUILDDIR)/cuda_utils.o
$(NVCC) $(LD_NVCCFLAGS) $(NVCCFLAGS) $^ -o $@ $(NVCC) $(LD_NVCCFLAGS) $(NVCCFLAGS) $^ -o $@
@ -120,7 +122,7 @@ endif
$(BUILDDIR)/cnn-preview: $(CNN_SRCDIR)/preview.c $(BUILDDIR)/cnn_jpeg.o $(BUILDDIR)/colors.o $(BUILDDIR)/utils.o $(BUILDDIR)/cnn-preview: $(CNN_SRCDIR)/preview.c $(BUILDDIR)/cnn_jpeg.o $(BUILDDIR)/colors.o $(BUILDDIR)/utils.o
$(CC) $^ -o $@ $(CFLAGS) $(LD_CFLAGS) $(CC) $^ -o $@ $(CFLAGS) $(LD_CFLAGS)
$(BUILDDIR)/cnn-export: $(CNN_SRCDIR)/export.c $(BUILDDIR)/cnn_free.o $(BUILDDIR)/cnn_neuron_io.o $(BUILDDIR)/utils.o $(BUILDDIR)/cnn-export: $(CNN_SRCDIR)/export.c $(BUILDDIR)/cnn_free.o $(BUILDDIR)/cnn_neuron_io.o $(BUILDDIR)/utils.o $(BUILDDIR)/memory_management.o $(BUILDDIR)/colors.o
$(CC) $^ -o $@ $(CFLAGS) $(LD_CFLAGS) $(CC) $^ -o $@ $(CFLAGS) $(LD_CFLAGS)
$(BUILDDIR)/cnn_%.o: $(CNN_SRCDIR)/%.c $(CNN_SRCDIR)/include/%.h $(BUILDDIR)/cnn_%.o: $(CNN_SRCDIR)/%.c $(CNN_SRCDIR)/include/%.h
@ -166,19 +168,26 @@ prepare-tests:
@rm -f $(BUILDDIR)/test-* @rm -f $(BUILDDIR)/test-*
build/test-cnn_%: $(TEST_SRCDIR)/cnn_%.c $(CNN_OBJ) $(BUILDDIR)/colors.o $(BUILDDIR)/mnist.o $(BUILDDIR)/utils.o build/test-cnn_%: $(TEST_SRCDIR)/cnn_%.c $(CNN_OBJ) $(BUILDDIR)/colors.o $(BUILDDIR)/mnist.o $(BUILDDIR)/utils.o $(BUILDDIR)/memory_management.o
$(CC) $^ -o $@ $(CFLAGS) $(LD_CFLAGS) $(CC) $^ -o $@ $(CFLAGS) $(LD_CFLAGS)
# mnist.o est déjà inclus en tant que mnist_mnist.o # mnist.o est déjà inclus en tant que mnist_mnist.o
build/test-mnist_%: $(TEST_SRCDIR)/mnist_%.c $(MNIST_OBJ) $(BUILDDIR)/colors.o build/test-mnist_%: $(TEST_SRCDIR)/mnist_%.c $(MNIST_OBJ) $(BUILDDIR)/colors.o
$(CC) $^ -o $@ $(CFLAGS) $(LD_CFLAGS) $(CC) $^ -o $@ $(CFLAGS) $(LD_CFLAGS)
build/test-memory_management: $(TEST_SRCDIR)/memory_management.c $(BUILDDIR)/colors.o $(BUILDDIR)/mnist.o $(BUILDDIR)/utils.o $(BUILDDIR)/test_memory_management.o
$(CC) $^ -o $@ $(CFLAGS) $(LD_CFLAGS)
$(BUILDDIR)/test_memory_management.o: $(SRCDIR)/memory_management.c $(SRCDIR)/include/memory_management.h
$(CC) -c $< -o $@ $(CFLAGS) -DTEST_MEMORY_MANAGEMENT
ifdef NVCC_INSTALLED ifdef NVCC_INSTALLED
$(BUILDDIR)/test-cnn_%: $(TEST_SRCDIR)/cnn_%.cu \ $(BUILDDIR)/test-cnn_%: $(TEST_SRCDIR)/cnn_%.cu \
$(BUILDDIR)/cnn_cuda_%.o \ $(BUILDDIR)/cnn_cuda_%.o \
$(BUILDDIR)/cuda_utils.o \ $(BUILDDIR)/cuda_utils.o \
$(BUILDDIR)/colors.o \ $(BUILDDIR)/colors.o \
$(BUILDDIR)/mnist.cuda.o $(BUILDDIR)/mnist.cuda.o \
$(BUILDDIR)/cuda_memory_management.o
$(NVCC) $(LD_NVCCFLAGS) $(NVCCFLAGS) $^ -o $@ $(NVCC) $(LD_NVCCFLAGS) $(NVCCFLAGS) $^ -o $@
else else
$(BUILDDIR)/test-cnn_%: $(TEST_SRCDIR)/cnn_%.cu $(BUILDDIR)/test-cnn_%: $(TEST_SRCDIR)/cnn_%.cu

View File

@ -3,8 +3,8 @@
#include <stdbool.h> #include <stdbool.h>
#include <string.h> #include <string.h>
#include "../include/memory_management.h"
#include "../include/colors.h" #include "../include/colors.h"
#include "../include/utils.h"
#include "include/struct.h" #include "include/struct.h"
#define copyVar(var) network_cp->var = network->var #define copyVar(var) network_cp->var = network->var

View File

@ -0,0 +1,52 @@
#include <stdio.h>
#include <stdbool.h>
#ifndef DEF_MEM_MANAGEMENT_H
#define DEF_MEM_MANAGEMENT_H
// A block of memory is 48kB
// https://forums.developer.nvidia.com/t/find-the-limit-of-shared-memory-that-can-be-used-per-block/48556
#define MEMORY_BLOCK 49152
// We define our memory with a linked list of memory blocks
typedef struct Memory {
void* start; // Start of the allocated memory
void* cursor; // Current cursor
size_t size; // Taille de la mémoire allouée
int nb_alloc; // Nombre d'allocations dans le bloc
struct Memory* next; // Élément suivant
} Memory;
// Renvoie le nombre d'allocations totales dans la mémoire
int get_memory_distinct_allocations();
// Fonction récursive correspondante
int get_distinct_allocations(Memory* mem);
// Renvoie le nombre d'éléments dans la liste chaînée représentant la mémoire
int get_memory_blocks_number();
// Renvoie la taille d'une liste chaînée
int get_length(Memory* mem);
// Créer un bloc de mémoire de taille size
Memory* create_memory_block(size_t size);
// Allouer un élément de taille size dans mem
void* allocate_memory(size_t size, Memory* mem);
// Essayer de libérer le pointeur représenté par ptr dans mem
Memory* free_memory(void* ptr, Memory* mem);
#ifdef __CUDACC__
extern "C"
#endif
void* nalloc(size_t sz);
#ifdef __CUDACC__
extern "C"
#endif
void gree(void* ptr);
#endif

View File

@ -38,14 +38,4 @@ extern "C"
#endif #endif
bool check_cuda_compatibility(); bool check_cuda_compatibility();
#ifdef __CUDACC__
extern "C"
#endif
void* nalloc(size_t sz);
#ifdef __CUDACC__
extern "C"
#endif
void gree(void* ptr);
#endif #endif

132
src/memory_management.c Normal file
View File

@ -0,0 +1,132 @@
#include <stdlib.h>
#include <stdio.h>
#include <inttypes.h>
#include <pthread.h>
#include "include/memory_management.h"
#include "include/colors.h"
Memory* memory = NULL;
pthread_mutex_t memory_lock = PTHREAD_MUTEX_INITIALIZER;
int get_distinct_allocations(Memory* mem) {
if (!mem) {
return 0;
}
return mem->nb_alloc + get_distinct_allocations(mem->next);
}
int get_length(Memory* mem) {
if (!mem) {
return 0;
}
return 1 + get_distinct_allocations(mem->next);
}
int get_memory_distinct_allocations() {
return get_distinct_allocations(memory);
}
int get_memory_blocks_number() {
return get_length(memory);
}
Memory* create_memory_block(size_t size) {
Memory* mem = (Memory*)malloc(sizeof(Memory));
#ifdef __CUDACC__
cudaMallocManaged(&(mem->start), size, cudaMemAttachHost);
#else
mem->start = malloc(size);
#endif
mem->cursor = mem->start;
mem->size = size;
mem->nb_alloc = 0;
mem->next = NULL;
return mem;
}
void* allocate_memory(size_t size, Memory* mem) {
// Si il y a suffisamment de mémoire disponible
if (mem->size - ((intptr_t)mem->cursor - (intptr_t)mem->start) >= size) {
void* ptr = mem->cursor;
mem->cursor = (void*)((intptr_t)mem->cursor + size); // On décale le curseur de la taille allouée
mem->nb_alloc++;
return ptr;
} else {
printf("Mémoire disponible: %ld. Nécessaire: %ld\n", mem->size - ((intptr_t)mem->cursor - (intptr_t)mem->start), size);
// Sinon on continue sur l'élément suivant de la liste
if (!mem->next) {
mem->next = create_memory_block(MEMORY_BLOCK < size ? size : MEMORY_BLOCK);
}
return allocate_memory(size, mem->next);
}
}
Memory* free_memory(void* ptr, Memory* mem) {
if (!mem) {
printf("[ERREUR] Le pointeur %p a déjà été libéré ou n'a jamais été alloué\n", ptr);
return mem;
}
if ((intptr_t)mem->start <= (intptr_t)ptr && (intptr_t)ptr <= (intptr_t)mem->start + (intptr_t)mem->size) {
mem->nb_alloc--;
if (mem->nb_alloc == 0) {
Memory* mem_next = mem->next;
#ifdef __CUDACC__
cudaFree(mem->start);
#else
free(mem->start);
#endif
free(mem);
return mem_next;
} else {
return mem;
}
} else {
mem->next = free_memory(ptr, mem->next);
return mem;
}
}
#ifdef __CUDACC__
extern "C"
#endif
void* nalloc(size_t sz) {
#if defined(USE_CUDA) || defined(TEST_MEMORY_MANAGEMENT)
pthread_mutex_lock(&memory_lock);
if (!memory) {
// We allocate a new memory block
memory = create_memory_block(MEMORY_BLOCK < sz ? sz : MEMORY_BLOCK);
}
printf("Distinct allocations: %d Blocks: %d\n", get_distinct_allocations(memory), get_length(memory));
printf("Requested memory of size %ld\n", sz);
void* ptr = allocate_memory(sz, memory);
pthread_mutex_unlock(&memory_lock);
return ptr;
#else
void* ptr = malloc(sz);
return ptr;
#endif
}
#ifdef __CUDACC__
extern "C"
#endif
void gree(void* ptr) {
#if defined(USE_CUDA) || defined(TEST_MEMORY_MANAGEMENT)
pthread_mutex_lock(&memory_lock);
free_memory(ptr, memory);
pthread_mutex_unlock(&memory_lock);
#else
free(ptr);
#endif
}

132
src/memory_management.cu Normal file
View File

@ -0,0 +1,132 @@
#include <stdlib.h>
#include <stdio.h>
#include <inttypes.h>
#include <pthread.h>
#include "include/memory_management.h"
#include "include/colors.h"
Memory* memory = NULL;
pthread_mutex_t memory_lock = PTHREAD_MUTEX_INITIALIZER;
int get_distinct_allocations(Memory* mem) {
if (!mem) {
return 0;
}
return mem->nb_alloc + get_distinct_allocations(mem->next);
}
int get_length(Memory* mem) {
if (!mem) {
return 0;
}
return 1 + get_distinct_allocations(mem->next);
}
int get_memory_distinct_allocations() {
return get_distinct_allocations(memory);
}
int get_memory_blocks_number() {
return get_length(memory);
}
Memory* create_memory_block(size_t size) {
Memory* mem = (Memory*)malloc(sizeof(Memory));
#ifdef __CUDACC__
cudaMallocManaged(&(mem->start), size, cudaMemAttachHost);
#else
mem->start = malloc(size);
#endif
mem->cursor = mem->start;
mem->size = size;
mem->nb_alloc = 0;
mem->next = NULL;
return mem;
}
void* allocate_memory(size_t size, Memory* mem) {
// Si il y a suffisamment de mémoire disponible
if (mem->size - ((intptr_t)mem->cursor - (intptr_t)mem->start) >= size) {
void* ptr = mem->cursor;
mem->cursor = (void*)((intptr_t)mem->cursor + size); // On décale le curseur de la taille allouée
mem->nb_alloc++;
return ptr;
} else {
printf("Mémoire disponible: %ld. Nécessaire: %ld\n", mem->size - ((intptr_t)mem->cursor - (intptr_t)mem->start), size);
// Sinon on continue sur l'élément suivant de la liste
if (!mem->next) {
mem->next = create_memory_block(MEMORY_BLOCK < size ? size : MEMORY_BLOCK);
}
return allocate_memory(size, mem->next);
}
}
Memory* free_memory(void* ptr, Memory* mem) {
if (!mem) {
printf("[ERREUR] Le pointeur %p a déjà été libéré ou n'a jamais été alloué\n", ptr);
return mem;
}
if ((intptr_t)mem->start <= (intptr_t)ptr && (intptr_t)ptr <= (intptr_t)mem->start + (intptr_t)mem->size) {
mem->nb_alloc--;
if (mem->nb_alloc == 0) {
Memory* mem_next = mem->next;
#ifdef __CUDACC__
cudaFree(mem->start);
#else
free(mem->start);
#endif
free(mem);
return mem_next;
} else {
return mem;
}
} else {
mem->next = free_memory(ptr, mem->next);
return mem;
}
}
#ifdef __CUDACC__
extern "C"
#endif
void* nalloc(size_t sz) {
#if defined(USE_CUDA) || defined(TEST_MEMORY_MANAGEMENT)
pthread_mutex_lock(&memory_lock);
if (!memory) {
// We allocate a new memory block
memory = create_memory_block(MEMORY_BLOCK < sz ? sz : MEMORY_BLOCK);
}
printf("Distinct allocations: %d Blocks: %d\n", get_distinct_allocations(memory), get_length(memory));
printf("Requested memory of size %ld\n", sz);
void* ptr = allocate_memory(sz, memory);
pthread_mutex_unlock(&memory_lock);
return ptr;
#else
void* ptr = malloc(sz);
return ptr;
#endif
}
#ifdef __CUDACC__
extern "C"
#endif
void gree(void* ptr) {
#if defined(USE_CUDA) || defined(TEST_MEMORY_MANAGEMENT)
pthread_mutex_lock(&memory_lock);
free_memory(ptr, memory);
pthread_mutex_unlock(&memory_lock);
#else
free(ptr);
#endif
}

View File

@ -43,37 +43,3 @@ bool check_cuda_compatibility() {
return false; return false;
#endif #endif
} }
#ifndef USE_CUDA
#ifdef __CUDACC__
extern "C"
#endif
void* nalloc(size_t sz) {
void* ptr = malloc(sz);
return ptr;
}
#ifdef __CUDACC__
extern "C"
#endif
void gree(void* ptr) {
free(ptr);
}
#else
#ifdef __CUDACC__
extern "C"
#endif
void* nalloc(size_t sz) {
void* ptr;
cudaMallocManaged(&ptr, sz, cudaMemAttachHost);
return ptr;
}
#ifdef __CUDACC__
extern "C"
#endif
void gree(void* ptr) {
cudaFree(ptr);
}
#endif

View File

@ -43,37 +43,3 @@ bool check_cuda_compatibility() {
return false; return false;
#endif #endif
} }
#ifndef USE_CUDA
#ifdef __CUDACC__
extern "C"
#endif
void* nalloc(size_t sz) {
void* ptr = malloc(sz);
return ptr;
}
#ifdef __CUDACC__
extern "C"
#endif
void gree(void* ptr) {
free(ptr);
}
#else
#ifdef __CUDACC__
extern "C"
#endif
void* nalloc(size_t sz) {
void* ptr;
cudaMallocManaged(&ptr, sz, cudaMemAttachHost);
return ptr;
}
#ifdef __CUDACC__
extern "C"
#endif
void gree(void* ptr) {
cudaFree(ptr);
}
#endif

70
test/memory_management.c Normal file
View File

@ -0,0 +1,70 @@
#include <stdlib.h>
#include <stdio.h>
#include <assert.h>
#include "../src/include/memory_management.h"
#include "../src/include/colors.h"
#define N 350
int main() {
printf("Pollution de la mémoire\n");
int mem_used;
int blocks_used;
// We pollute a little bit the memory before the tests
int* pointeurs[N];
for (int i=1; i < N; i++) {
pointeurs[i] = nalloc(i*sizeof(int));
for (int j=0; j < i; j++) {
pointeurs[i][j] = i;
}
}
// We test in a first place that one simple allocation works as expected
mem_used = get_memory_distinct_allocations();
blocks_used = get_memory_blocks_number();
void* ptr = nalloc(15);
if (! (get_memory_distinct_allocations() <= mem_used+1)) {
printf_error("Plus d'un élément de mémoire alloué en une seule allocation\n");
exit(1);
}
gree(ptr);
if (! (get_memory_blocks_number() == blocks_used)) {
printf_error("La mémoire n'a pas été libérée correctement\n");
exit(1);
}
printf(GREEN "OK\n" RESET);
printf("Allocation de deux demi-blocs\n");
// We test that we do not use too much blocks
blocks_used = get_memory_blocks_number();
void* ptr1 = nalloc(-1+MEMORY_BLOCK/2);
void* ptr2 = nalloc(-1+MEMORY_BLOCK/2);
if (! (get_memory_blocks_number() <= blocks_used +1)) {
printf_error("Trop de blocs ont été alloués par rapport à la mémoire nécessaire\n");
exit(1);
}
printf(GREEN "OK\n" RESET);
printf("Libération de la mémoire\n");
for (int i=1; i < N; i++) {
for (int j=0; j < i; j++) {
// We test that the memory does not overlap itself
assert(pointeurs[i][j] == i);
}
gree(pointeurs[i]);
}
gree(ptr1);
gree(ptr2);
if (! (get_memory_distinct_allocations() == 0 && get_memory_blocks_number() == 0)) {
printf_error("La mémoire n'a pas été libérée correctement\n");
exit(1);
}
printf(GREEN "OK\n" RESET);
return 0;
}