mem management: add faster way to free all memory

This commit is contained in:
augustin64 2023-05-15 10:44:09 +02:00
parent 4cffcc1c95
commit 19005366d3
12 changed files with 227 additions and 134 deletions

View File

@ -234,7 +234,7 @@ void backward_propagation(Network* network, int wanted_number) {
// Backward sur la dernière couche qui utilise toujours SOFTMAX
float* wanted_output = generate_wanted_output(wanted_number, network->width[network->size -1]); // Sortie désirée, permet d'initialiser une erreur
softmax_backward_cross_entropy(network->input[n-1][0][0], wanted_output, network->width[n-1]);
gree(wanted_output);
gree(wanted_output, false);
/*
* On propage à chaque étape:

View File

@ -1,3 +1,4 @@
#include <stdbool.h>
#include <stdlib.h>
#include <stdio.h>
@ -8,25 +9,25 @@
void free_a_cube_input_layer(Network* network, int pos, int depth, int dim) {
for (int i=0; i < depth; i++) {
for (int j=0; j < dim; j++) {
gree(network->input[pos][i][j]);
gree(network->input_z[pos][i][j]);
gree(network->input[pos][i][j], true);
gree(network->input_z[pos][i][j], true);
}
gree(network->input[pos][i]);
gree(network->input_z[pos][i]);
gree(network->input[pos][i], true);
gree(network->input_z[pos][i], true);
}
gree(network->input[pos]);
gree(network->input_z[pos]);
gree(network->input[pos], true);
gree(network->input_z[pos], true);
}
void free_a_line_input_layer(Network* network, int pos) {
// Libère l'espace mémoire de network->input[pos] et network->input_z[pos]
// lorsque ces couches sont denses (donc sont des matrice de dimension 1)
gree(network->input[pos][0][0]);
gree(network->input_z[pos][0][0]);
gree(network->input[pos][0]);
gree(network->input_z[pos][0]);
gree(network->input[pos]);
gree(network->input_z[pos]);
gree(network->input[pos][0][0], true);
gree(network->input_z[pos][0][0], true);
gree(network->input[pos][0], true);
gree(network->input_z[pos][0], true);
gree(network->input[pos], true);
gree(network->input_z[pos], true);
}
void free_pooling(Network* network, int pos) {
@ -43,59 +44,59 @@ void free_convolution(Network* network, int pos) {
free_a_cube_input_layer(network, pos+1, network->depth[pos+1], network->width[pos+1]);
for (int i=0; i < c; i++) {
for (int j=0; j < bias_size; j++) {
gree(k_pos->bias[i][j]);
gree(k_pos->d_bias[i][j]);
gree(k_pos->bias[i][j], true);
gree(k_pos->d_bias[i][j], true);
#ifdef ADAM_CNN_BIAS
gree(k_pos->s_d_bias[i][j]);
gree(k_pos->v_d_bias[i][j]);
gree(k_pos->s_d_bias[i][j], true);
gree(k_pos->v_d_bias[i][j], true);
#endif
}
gree(k_pos->bias[i]);
gree(k_pos->d_bias[i]);
gree(k_pos->bias[i], true);
gree(k_pos->d_bias[i], true);
#ifdef ADAM_CNN_BIAS
gree(k_pos->s_d_bias[i]);
gree(k_pos->v_d_bias[i]);
gree(k_pos->s_d_bias[i], true);
gree(k_pos->v_d_bias[i], true);
#endif
}
gree(k_pos->bias);
gree(k_pos->d_bias);
gree(k_pos->bias, true);
gree(k_pos->d_bias, true);
#ifdef ADAM_CNN_BIAS
gree(k_pos->s_d_bias);
gree(k_pos->v_d_bias);
gree(k_pos->s_d_bias, true);
gree(k_pos->v_d_bias, true);
#endif
for (int i=0; i < r; i++) {
for (int j=0; j < c; j++) {
for (int k=0; k < k_size; k++) {
gree(k_pos->weights[i][j][k]);
gree(k_pos->d_weights[i][j][k]);
gree(k_pos->weights[i][j][k], true);
gree(k_pos->d_weights[i][j][k], true);
#ifdef ADAM_CNN_WEIGHTS
gree(k_pos->s_d_weights[i][j][k]);
gree(k_pos->v_d_weights[i][j][k]);
gree(k_pos->s_d_weights[i][j][k], true);
gree(k_pos->v_d_weights[i][j][k], true);
#endif
}
gree(k_pos->weights[i][j]);
gree(k_pos->d_weights[i][j]);
gree(k_pos->weights[i][j], true);
gree(k_pos->d_weights[i][j], true);
#ifdef ADAM_CNN_WEIGHTS
gree(k_pos->s_d_weights[i][j]);
gree(k_pos->v_d_weights[i][j]);
gree(k_pos->s_d_weights[i][j], true);
gree(k_pos->v_d_weights[i][j], true);
#endif
}
gree(k_pos->weights[i]);
gree(k_pos->d_weights[i]);
gree(k_pos->weights[i], true);
gree(k_pos->d_weights[i], true);
#ifdef ADAM_CNN_WEIGHTS
gree(k_pos->s_d_weights[i]);
gree(k_pos->v_d_weights[i]);
gree(k_pos->s_d_weights[i], true);
gree(k_pos->v_d_weights[i], true);
#endif
}
gree(k_pos->weights);
gree(k_pos->d_weights);
gree(k_pos->weights, true);
gree(k_pos->d_weights, true);
#ifdef ADAM_CNN_WEIGHTS
gree(k_pos->s_d_weights);
gree(k_pos->v_d_weights);
gree(k_pos->s_d_weights, true);
gree(k_pos->v_d_weights, true);
#endif
gree(k_pos);
gree(k_pos, true);
}
void free_dense(Network* network, int pos) {
@ -103,28 +104,28 @@ void free_dense(Network* network, int pos) {
Kernel_nn* k_pos = network->kernel[pos]->nn;
int dim = k_pos->size_input;
for (int i=0; i < dim; i++) {
gree(k_pos->weights[i]);
gree(k_pos->d_weights[i]);
gree(k_pos->weights[i], true);
gree(k_pos->d_weights[i], true);
#ifdef ADAM_DENSE_WEIGHTS
gree(k_pos->s_d_weights[i]);
gree(k_pos->v_d_weights[i]);
gree(k_pos->s_d_weights[i], true);
gree(k_pos->v_d_weights[i], true);
#endif
}
gree(k_pos->weights);
gree(k_pos->d_weights);
gree(k_pos->weights, true);
gree(k_pos->d_weights, true);
#ifdef ADAM_DENSE_WEIGHTS
gree(k_pos->s_d_weights);
gree(k_pos->v_d_weights);
gree(k_pos->s_d_weights, true);
gree(k_pos->v_d_weights, true);
#endif
gree(k_pos->bias);
gree(k_pos->d_bias);
gree(k_pos->bias, true);
gree(k_pos->d_bias, true);
#ifdef ADAM_DENSE_BIAS
gree(k_pos->s_d_bias);
gree(k_pos->v_d_bias);
gree(k_pos->s_d_bias, true);
gree(k_pos->v_d_bias, true);
#endif
gree(k_pos);
gree(k_pos, true);
}
void free_dense_linearisation(Network* network, int pos) {
@ -132,28 +133,28 @@ void free_dense_linearisation(Network* network, int pos) {
Kernel_nn* k_pos = network->kernel[pos]->nn;
int dim = k_pos->size_input;
for (int i=0; i < dim; i++) {
gree(k_pos->weights[i]);
gree(k_pos->d_weights[i]);
gree(k_pos->weights[i], true);
gree(k_pos->d_weights[i], true);
#ifdef ADAM_DENSE_WEIGHTS
gree(k_pos->s_d_weights[i]);
gree(k_pos->v_d_weights[i]);
gree(k_pos->s_d_weights[i], true);
gree(k_pos->v_d_weights[i], true);
#endif
}
gree(k_pos->weights);
gree(k_pos->d_weights);
gree(k_pos->weights, true);
gree(k_pos->d_weights, true);
#ifdef ADAM_DENSE_WEIGHTS
gree(k_pos->s_d_weights);
gree(k_pos->v_d_weights);
gree(k_pos->s_d_weights, true);
gree(k_pos->v_d_weights, true);
#endif
gree(k_pos->bias);
gree(k_pos->d_bias);
gree(k_pos->bias, true);
gree(k_pos->d_bias, true);
#ifdef ADAM_DENSE_BIAS
gree(k_pos->s_d_bias);
gree(k_pos->v_d_bias);
gree(k_pos->s_d_bias, true);
gree(k_pos->v_d_bias, true);
#endif
gree(k_pos);
gree(k_pos, true);
}
void free_network_creation(Network* network) {
@ -161,36 +162,44 @@ void free_network_creation(Network* network) {
free_a_cube_input_layer(network, 0, network->depth[0], network->width[0]);
for (int i=0; i < network->max_size-1; i++) {
gree(network->kernel[i]);
gree(network->kernel[i], true);
}
gree(network->width);
gree(network->depth);
gree(network->kernel);
gree(network->input);
gree(network->input_z);
gree(network->width, true);
gree(network->depth, true);
gree(network->kernel, true);
gree(network->input, true);
gree(network->input_z, true);
gree(network);
gree(network, true);
}
void free_network(Network* network) {
for (int i=network->size-2; i>=0; i--) {
if (network->kernel[i]->cnn != NULL) {
// Convolution
free_convolution(network, i);
}
else if (network->kernel[i]->nn != NULL) {
// Dense
if (network->kernel[i]->linearisation == DOESNT_LINEARISE) {
// Dense normale
free_dense(network, i);
#if defined(USE_CUDA) || defined(TEST_MEMORY_MANAGEMENT)
// Supprimer toute la mémoire allouée avec nalloc directement
// Il n'y a alors plus besoin de parcourir tout le réseau,
// mais il faut que TOUTE la mémoire du réseau ait été allouée de cette manière
// et que cela soit le cas UNIQUEMENT pour la mémoire allouée au réseau
free_all_memory();
#else
for (int i=network->size-2; i>=0; i--) {
if (network->kernel[i]->cnn != NULL) {
// Convolution
free_convolution(network, i);
}
else if (network->kernel[i]->nn != NULL) {
// Dense
if (network->kernel[i]->linearisation == DOESNT_LINEARISE) {
// Dense normale
free_dense(network, i);
} else {
// Dense qui linéarise
free_dense_linearisation(network, i);
}
} else {
// Dense qui linéarise
free_dense_linearisation(network, i);
// Pooling
free_pooling(network, i);
}
} else {
// Pooling
free_pooling(network, i);
}
}
free_network_creation(network);
free_network_creation(network);
#endif
}

View File

@ -51,7 +51,7 @@ float* test_network_mnist(Network* network, char* images_file, char* labels_file
// Compute loss
wanted_output = generate_wanted_output(labels[i], 10);
loss += compute_mean_squared_error(network->input[network->size-1][0][0], wanted_output, 10);
gree(wanted_output);
gree(wanted_output, false);
for (int j=0; j < height; j++) {
free(images[i][j]);

View File

@ -84,7 +84,7 @@ void* train_thread(void* parameters) {
wanted_output = generate_wanted_output(labels[index[i]], 10);
loss += compute_mean_squared_error(network->input[network->size-1][0][0], wanted_output, 10);
gree(wanted_output);
gree(wanted_output, false);
backward_propagation(network, labels[index[i]]);

View File

@ -64,6 +64,20 @@ void print_memory_rec(Memory* mem);
void print_memory();
#ifdef __CUDACC__
extern "C"
#endif
/*
* Supprime tous les blocs de mémoire
*/
void free_all_memory();
/*
* Fonction récursive correspondante
*/
void free_all_memory_rec(Memory* mem);
/*
* Créer un bloc de mémoire de taille size
*/
@ -76,8 +90,11 @@ void* allocate_memory(int nb_elements, size_t size, Memory* mem);
/*
* Essayer de libérer le pointeur représenté par ptr dans mem
* Si `already_freed`, le programme ne renvoiera pas d'erreur si
* le bloc correspondant à l'élément est déjà libéré
* (dans l'utilisation de `free_all_memory()` par exemple)
*/
Memory* free_memory(void* ptr, Memory* mem);
Memory* free_memory(void* ptr, Memory* mem, bool already_freed);
#ifdef __CUDACC__
extern "C"
@ -92,7 +109,10 @@ extern "C"
#endif
/*
* Libérer le mémoire allouée avec nalloc
* Si `already_freed`, le programme ne renvoiera pas d'erreur si
* le bloc correspondant à l'élément est déjà libéré
* (dans l'utilisation de `free_all_memory()` par exemple)
*/
void gree(void* ptr);
void gree(void* ptr, bool already_freed);
#endif

View File

@ -40,6 +40,7 @@ int get_memory_blocks_number() {
return get_length(memory);
}
void print_memory_rec(Memory* mem) {
if (!mem) {
return;
@ -51,12 +52,43 @@ void print_memory_rec(Memory* mem) {
print_memory_rec(mem->next);
}
void print_memory() {
printf(BLUE "==== MEMORY ====\n" RESET);
print_memory_rec(memory);
}
#ifdef __CUDACC__
extern "C"
#endif
void free_all_memory() {
pthread_mutex_lock(&memory_lock); // We don't want ANY interruption so we lock here
free_all_memory_rec(memory);
#ifdef MEMORY_TAIL_OPT
tail = NULL;
#endif
pthread_mutex_unlock(&memory_lock);
}
void free_all_memory_rec(Memory* mem) {
if (!mem) {
return;
}
Memory* next = mem->next;
#ifdef __CUDACC__
cudaFree(mem->start);
#else
free(mem->start);
#endif
free(mem);
free_all_memory_rec(next);
}
Memory* create_memory_block(size_t size) {
Memory* mem = (Memory*)malloc(sizeof(Memory));
#ifdef __CUDACC__
@ -112,8 +144,8 @@ void* allocate_memory(int nb_elements, size_t size, Memory* mem) {
}
Memory* free_memory(void* ptr, Memory* mem) {
if (!mem) {
Memory* free_memory(void* ptr, Memory* mem, bool already_freed) {
if (!mem && !already_freed) {
printf_error((char*)"Le pointeur ");
printf("%p a déjà été libéré ou n'a jamais été alloué\n", ptr);
return mem;
@ -141,7 +173,7 @@ Memory* free_memory(void* ptr, Memory* mem) {
return mem;
}
} else {
mem->next = free_memory(ptr, mem->next);
mem->next = free_memory(ptr, mem->next, already_freed);
return mem;
}
}
@ -176,10 +208,10 @@ void* nalloc(int nb_elements, size_t size) {
#ifdef __CUDACC__
extern "C"
#endif
void gree(void* ptr) {
void gree(void* ptr, bool already_freed) {
#if defined(__CUDACC__) || defined(TEST_MEMORY_MANAGEMENT)
pthread_mutex_lock(&memory_lock);
memory = free_memory(ptr, memory);
memory = free_memory(ptr, memory, already_freed);
pthread_mutex_unlock(&memory_lock);
#else
free(ptr);

View File

@ -40,6 +40,7 @@ int get_memory_blocks_number() {
return get_length(memory);
}
void print_memory_rec(Memory* mem) {
if (!mem) {
return;
@ -51,12 +52,43 @@ void print_memory_rec(Memory* mem) {
print_memory_rec(mem->next);
}
void print_memory() {
printf(BLUE "==== MEMORY ====\n" RESET);
print_memory_rec(memory);
}
#ifdef __CUDACC__
extern "C"
#endif
void free_all_memory() {
pthread_mutex_lock(&memory_lock); // We don't want ANY interruption so we lock here
free_all_memory_rec(memory);
#ifdef MEMORY_TAIL_OPT
tail = NULL;
#endif
pthread_mutex_unlock(&memory_lock);
}
void free_all_memory_rec(Memory* mem) {
if (!mem) {
return;
}
Memory* next = mem->next;
#ifdef __CUDACC__
cudaFree(mem->start);
#else
free(mem->start);
#endif
free(mem);
free_all_memory_rec(next);
}
Memory* create_memory_block(size_t size) {
Memory* mem = (Memory*)malloc(sizeof(Memory));
#ifdef __CUDACC__
@ -112,8 +144,8 @@ void* allocate_memory(int nb_elements, size_t size, Memory* mem) {
}
Memory* free_memory(void* ptr, Memory* mem) {
if (!mem) {
Memory* free_memory(void* ptr, Memory* mem, bool already_freed) {
if (!mem && !already_freed) {
printf_error((char*)"Le pointeur ");
printf("%p a déjà été libéré ou n'a jamais été alloué\n", ptr);
return mem;
@ -141,7 +173,7 @@ Memory* free_memory(void* ptr, Memory* mem) {
return mem;
}
} else {
mem->next = free_memory(ptr, mem->next);
mem->next = free_memory(ptr, mem->next, already_freed);
return mem;
}
}
@ -176,10 +208,10 @@ void* nalloc(int nb_elements, size_t size) {
#ifdef __CUDACC__
extern "C"
#endif
void gree(void* ptr) {
void gree(void* ptr, bool already_freed) {
#if defined(__CUDACC__) || defined(TEST_MEMORY_MANAGEMENT)
pthread_mutex_lock(&memory_lock);
memory = free_memory(ptr, memory);
memory = free_memory(ptr, memory, already_freed);
pthread_mutex_unlock(&memory_lock);
#else
free(ptr);

View File

@ -72,11 +72,11 @@ float*** create_empty_matrix(int n, int p, int q) {
void free_matrix(float*** matrix, int n, int p) {
for (int i=0; i < n; i++) {
for (int j=0; j < p; j++) {
gree(matrix[i][j]);
gree(matrix[i][j], false);
}
gree(matrix[i]);
gree(matrix[i], false);
}
gree(matrix);
gree(matrix, false);
}
bool check_matrices_equality(float*** m1, float*** m2, int n, int p, int q, int acceptation) {
@ -177,11 +177,11 @@ void run_convolution_test(int input_width, int output_width, int rows, int colum
free_matrix(kernel->v_d_weights[i], kernel->columns, kernel->k_size);
#endif
}
gree(kernel->weights);
gree(kernel->d_weights);
gree(kernel->weights, false);
gree(kernel->d_weights, false);
#ifdef ADAM_CNN_WEIGHTS
gree(kernel->s_d_weights);
gree(kernel->v_d_weights);
gree(kernel->s_d_weights, false);
gree(kernel->v_d_weights, false);
#endif
free_matrix(input, kernel->rows, input_width);

View File

@ -76,13 +76,13 @@ void test1(int activation, bool use_local_kernel) {
exit(1);
}
}
gree(input[i][j]);
gree(input[i][j], false);
free(input_initial[i][j]);
}
gree(input[i]);
gree(input[i], false);
free(input_initial[i]);
}
gree(input);
gree(input, false);
free(input_initial);
printf("\t" GREEN "OK\n" RESET);

View File

@ -104,24 +104,24 @@ void run_matrices_test(int n, int p, int q) {
// On libère l'espace mémoire alloué
for (int i=0; i < n; i++) {
gree(matrix1[i]);
gree(matrix1[i], false);
}
gree(matrix1);
gree(matrix1, false);
for (int i=0; i < p; i++) {
gree(matrix2[i]);
gree(matrix2[i], false);
}
gree(matrix2);
gree(matrix2, false);
for (int i=0; i < n; i++) {
gree(result_cpu[i]);
gree(result_cpu[i], false);
}
gree(result_cpu);
gree(result_cpu, false);
for (int i=0; i < n; i++) {
gree(result_gpu[i]);
gree(result_gpu[i], false);
}
gree(result_gpu);
gree(result_gpu, false);
}

View File

@ -28,7 +28,7 @@ int main() {
printf_error((char*)"Plus d'un élément de mémoire alloué en une seule allocation\n");
exit(1);
}
gree(ptr);
gree(ptr, false);
if (! (get_memory_blocks_number() == blocks_used)) {
printf_error((char*)"La mémoire n'a pas été libérée correctement\n");
exit(1);
@ -56,11 +56,11 @@ int main() {
// We test that the memory does not overlap itself
assert(pointeurs[i][j] == i);
}
gree(pointeurs[i]);
gree(pointeurs[i], false);
}
gree(ptr1);
gree(ptr2);
gree(ptr1, false);
gree(ptr2, false);
if (! (get_memory_distinct_allocations() == 0 && get_memory_blocks_number() == 0)) {
printf_error((char*)"La mémoire n'a pas été libérée correctement\n");
exit(1);

View File

@ -45,7 +45,7 @@ int main() {
printf("Plus d'un élément de mémoire alloué en une seule allocation\n");
exit(1);
}
gree(ptr);
gree(ptr, false);
if (! (get_memory_blocks_number() == blocks_used)) {
printf("La mémoire n'a pas été libérée correctement\n");
exit(1);
@ -86,11 +86,11 @@ int main() {
// We test that the memory does not overlap itself
assert(pointeurs[i][j] == i+1);
}
gree(pointeurs[i]);
gree(pointeurs[i], false);
}
gree(ptr1);
gree(ptr2);
gree(ptr1, false);
gree(ptr2, false);
if (! (get_memory_distinct_allocations() == 0 && get_memory_blocks_number() == 0)) {
printf("La mémoire n'a pas été libérée correctement\n");
exit(1);