diff --git a/src/cnn/cnn.c b/src/cnn/cnn.c index 43ad751..b0de52d 100644 --- a/src/cnn/cnn.c +++ b/src/cnn/cnn.c @@ -234,7 +234,7 @@ void backward_propagation(Network* network, int wanted_number) { // Backward sur la dernière couche qui utilise toujours SOFTMAX float* wanted_output = generate_wanted_output(wanted_number, network->width[network->size -1]); // Sortie désirée, permet d'initialiser une erreur softmax_backward_cross_entropy(network->input[n-1][0][0], wanted_output, network->width[n-1]); - gree(wanted_output); + gree(wanted_output, false); /* * On propage à chaque étape: diff --git a/src/cnn/free.c b/src/cnn/free.c index 18b64ce..9685dc4 100644 --- a/src/cnn/free.c +++ b/src/cnn/free.c @@ -1,3 +1,4 @@ +#include #include #include @@ -8,25 +9,25 @@ void free_a_cube_input_layer(Network* network, int pos, int depth, int dim) { for (int i=0; i < depth; i++) { for (int j=0; j < dim; j++) { - gree(network->input[pos][i][j]); - gree(network->input_z[pos][i][j]); + gree(network->input[pos][i][j], true); + gree(network->input_z[pos][i][j], true); } - gree(network->input[pos][i]); - gree(network->input_z[pos][i]); + gree(network->input[pos][i], true); + gree(network->input_z[pos][i], true); } - gree(network->input[pos]); - gree(network->input_z[pos]); + gree(network->input[pos], true); + gree(network->input_z[pos], true); } void free_a_line_input_layer(Network* network, int pos) { // Libère l'espace mémoire de network->input[pos] et network->input_z[pos] // lorsque ces couches sont denses (donc sont des matrice de dimension 1) - gree(network->input[pos][0][0]); - gree(network->input_z[pos][0][0]); - gree(network->input[pos][0]); - gree(network->input_z[pos][0]); - gree(network->input[pos]); - gree(network->input_z[pos]); + gree(network->input[pos][0][0], true); + gree(network->input_z[pos][0][0], true); + gree(network->input[pos][0], true); + gree(network->input_z[pos][0], true); + gree(network->input[pos], true); + gree(network->input_z[pos], true); } void free_pooling(Network* network, int pos) { @@ -43,59 +44,59 @@ void free_convolution(Network* network, int pos) { free_a_cube_input_layer(network, pos+1, network->depth[pos+1], network->width[pos+1]); for (int i=0; i < c; i++) { for (int j=0; j < bias_size; j++) { - gree(k_pos->bias[i][j]); - gree(k_pos->d_bias[i][j]); + gree(k_pos->bias[i][j], true); + gree(k_pos->d_bias[i][j], true); #ifdef ADAM_CNN_BIAS - gree(k_pos->s_d_bias[i][j]); - gree(k_pos->v_d_bias[i][j]); + gree(k_pos->s_d_bias[i][j], true); + gree(k_pos->v_d_bias[i][j], true); #endif } - gree(k_pos->bias[i]); - gree(k_pos->d_bias[i]); + gree(k_pos->bias[i], true); + gree(k_pos->d_bias[i], true); #ifdef ADAM_CNN_BIAS - gree(k_pos->s_d_bias[i]); - gree(k_pos->v_d_bias[i]); + gree(k_pos->s_d_bias[i], true); + gree(k_pos->v_d_bias[i], true); #endif } - gree(k_pos->bias); - gree(k_pos->d_bias); + gree(k_pos->bias, true); + gree(k_pos->d_bias, true); #ifdef ADAM_CNN_BIAS - gree(k_pos->s_d_bias); - gree(k_pos->v_d_bias); + gree(k_pos->s_d_bias, true); + gree(k_pos->v_d_bias, true); #endif for (int i=0; i < r; i++) { for (int j=0; j < c; j++) { for (int k=0; k < k_size; k++) { - gree(k_pos->weights[i][j][k]); - gree(k_pos->d_weights[i][j][k]); + gree(k_pos->weights[i][j][k], true); + gree(k_pos->d_weights[i][j][k], true); #ifdef ADAM_CNN_WEIGHTS - gree(k_pos->s_d_weights[i][j][k]); - gree(k_pos->v_d_weights[i][j][k]); + gree(k_pos->s_d_weights[i][j][k], true); + gree(k_pos->v_d_weights[i][j][k], true); #endif } - gree(k_pos->weights[i][j]); - gree(k_pos->d_weights[i][j]); + gree(k_pos->weights[i][j], true); + gree(k_pos->d_weights[i][j], true); #ifdef ADAM_CNN_WEIGHTS - gree(k_pos->s_d_weights[i][j]); - gree(k_pos->v_d_weights[i][j]); + gree(k_pos->s_d_weights[i][j], true); + gree(k_pos->v_d_weights[i][j], true); #endif } - gree(k_pos->weights[i]); - gree(k_pos->d_weights[i]); + gree(k_pos->weights[i], true); + gree(k_pos->d_weights[i], true); #ifdef ADAM_CNN_WEIGHTS - gree(k_pos->s_d_weights[i]); - gree(k_pos->v_d_weights[i]); + gree(k_pos->s_d_weights[i], true); + gree(k_pos->v_d_weights[i], true); #endif } - gree(k_pos->weights); - gree(k_pos->d_weights); + gree(k_pos->weights, true); + gree(k_pos->d_weights, true); #ifdef ADAM_CNN_WEIGHTS - gree(k_pos->s_d_weights); - gree(k_pos->v_d_weights); + gree(k_pos->s_d_weights, true); + gree(k_pos->v_d_weights, true); #endif - gree(k_pos); + gree(k_pos, true); } void free_dense(Network* network, int pos) { @@ -103,28 +104,28 @@ void free_dense(Network* network, int pos) { Kernel_nn* k_pos = network->kernel[pos]->nn; int dim = k_pos->size_input; for (int i=0; i < dim; i++) { - gree(k_pos->weights[i]); - gree(k_pos->d_weights[i]); + gree(k_pos->weights[i], true); + gree(k_pos->d_weights[i], true); #ifdef ADAM_DENSE_WEIGHTS - gree(k_pos->s_d_weights[i]); - gree(k_pos->v_d_weights[i]); + gree(k_pos->s_d_weights[i], true); + gree(k_pos->v_d_weights[i], true); #endif } - gree(k_pos->weights); - gree(k_pos->d_weights); + gree(k_pos->weights, true); + gree(k_pos->d_weights, true); #ifdef ADAM_DENSE_WEIGHTS - gree(k_pos->s_d_weights); - gree(k_pos->v_d_weights); + gree(k_pos->s_d_weights, true); + gree(k_pos->v_d_weights, true); #endif - gree(k_pos->bias); - gree(k_pos->d_bias); + gree(k_pos->bias, true); + gree(k_pos->d_bias, true); #ifdef ADAM_DENSE_BIAS - gree(k_pos->s_d_bias); - gree(k_pos->v_d_bias); + gree(k_pos->s_d_bias, true); + gree(k_pos->v_d_bias, true); #endif - gree(k_pos); + gree(k_pos, true); } void free_dense_linearisation(Network* network, int pos) { @@ -132,28 +133,28 @@ void free_dense_linearisation(Network* network, int pos) { Kernel_nn* k_pos = network->kernel[pos]->nn; int dim = k_pos->size_input; for (int i=0; i < dim; i++) { - gree(k_pos->weights[i]); - gree(k_pos->d_weights[i]); + gree(k_pos->weights[i], true); + gree(k_pos->d_weights[i], true); #ifdef ADAM_DENSE_WEIGHTS - gree(k_pos->s_d_weights[i]); - gree(k_pos->v_d_weights[i]); + gree(k_pos->s_d_weights[i], true); + gree(k_pos->v_d_weights[i], true); #endif } - gree(k_pos->weights); - gree(k_pos->d_weights); + gree(k_pos->weights, true); + gree(k_pos->d_weights, true); #ifdef ADAM_DENSE_WEIGHTS - gree(k_pos->s_d_weights); - gree(k_pos->v_d_weights); + gree(k_pos->s_d_weights, true); + gree(k_pos->v_d_weights, true); #endif - gree(k_pos->bias); - gree(k_pos->d_bias); + gree(k_pos->bias, true); + gree(k_pos->d_bias, true); #ifdef ADAM_DENSE_BIAS - gree(k_pos->s_d_bias); - gree(k_pos->v_d_bias); + gree(k_pos->s_d_bias, true); + gree(k_pos->v_d_bias, true); #endif - gree(k_pos); + gree(k_pos, true); } void free_network_creation(Network* network) { @@ -161,36 +162,44 @@ void free_network_creation(Network* network) { free_a_cube_input_layer(network, 0, network->depth[0], network->width[0]); for (int i=0; i < network->max_size-1; i++) { - gree(network->kernel[i]); + gree(network->kernel[i], true); } - gree(network->width); - gree(network->depth); - gree(network->kernel); - gree(network->input); - gree(network->input_z); + gree(network->width, true); + gree(network->depth, true); + gree(network->kernel, true); + gree(network->input, true); + gree(network->input_z, true); - gree(network); + gree(network, true); } void free_network(Network* network) { - for (int i=network->size-2; i>=0; i--) { - if (network->kernel[i]->cnn != NULL) { - // Convolution - free_convolution(network, i); - } - else if (network->kernel[i]->nn != NULL) { - // Dense - if (network->kernel[i]->linearisation == DOESNT_LINEARISE) { - // Dense normale - free_dense(network, i); + #if defined(USE_CUDA) || defined(TEST_MEMORY_MANAGEMENT) + // Supprimer toute la mémoire allouée avec nalloc directement + // Il n'y a alors plus besoin de parcourir tout le réseau, + // mais il faut que TOUTE la mémoire du réseau ait été allouée de cette manière + // et que cela soit le cas UNIQUEMENT pour la mémoire allouée au réseau + free_all_memory(); + #else + for (int i=network->size-2; i>=0; i--) { + if (network->kernel[i]->cnn != NULL) { + // Convolution + free_convolution(network, i); + } + else if (network->kernel[i]->nn != NULL) { + // Dense + if (network->kernel[i]->linearisation == DOESNT_LINEARISE) { + // Dense normale + free_dense(network, i); + } else { + // Dense qui linéarise + free_dense_linearisation(network, i); + } } else { - // Dense qui linéarise - free_dense_linearisation(network, i); + // Pooling + free_pooling(network, i); } - } else { - // Pooling - free_pooling(network, i); } - } - free_network_creation(network); + free_network_creation(network); + #endif } diff --git a/src/cnn/test_network.c b/src/cnn/test_network.c index 92351ae..0c15f4c 100644 --- a/src/cnn/test_network.c +++ b/src/cnn/test_network.c @@ -51,7 +51,7 @@ float* test_network_mnist(Network* network, char* images_file, char* labels_file // Compute loss wanted_output = generate_wanted_output(labels[i], 10); loss += compute_mean_squared_error(network->input[network->size-1][0][0], wanted_output, 10); - gree(wanted_output); + gree(wanted_output, false); for (int j=0; j < height; j++) { free(images[i][j]); diff --git a/src/cnn/train.c b/src/cnn/train.c index 9f58b70..c51ed7a 100644 --- a/src/cnn/train.c +++ b/src/cnn/train.c @@ -84,7 +84,7 @@ void* train_thread(void* parameters) { wanted_output = generate_wanted_output(labels[index[i]], 10); loss += compute_mean_squared_error(network->input[network->size-1][0][0], wanted_output, 10); - gree(wanted_output); + gree(wanted_output, false); backward_propagation(network, labels[index[i]]); diff --git a/src/common/include/memory_management.h b/src/common/include/memory_management.h index 6f28cce..40032d1 100644 --- a/src/common/include/memory_management.h +++ b/src/common/include/memory_management.h @@ -64,6 +64,20 @@ void print_memory_rec(Memory* mem); void print_memory(); +#ifdef __CUDACC__ +extern "C" +#endif +/* +* Supprime tous les blocs de mémoire +*/ +void free_all_memory(); + +/* +* Fonction récursive correspondante +*/ +void free_all_memory_rec(Memory* mem); + + /* * Créer un bloc de mémoire de taille size */ @@ -76,8 +90,11 @@ void* allocate_memory(int nb_elements, size_t size, Memory* mem); /* * Essayer de libérer le pointeur représenté par ptr dans mem +* Si `already_freed`, le programme ne renvoiera pas d'erreur si +* le bloc correspondant à l'élément est déjà libéré +* (dans l'utilisation de `free_all_memory()` par exemple) */ -Memory* free_memory(void* ptr, Memory* mem); +Memory* free_memory(void* ptr, Memory* mem, bool already_freed); #ifdef __CUDACC__ extern "C" @@ -92,7 +109,10 @@ extern "C" #endif /* * Libérer le mémoire allouée avec nalloc +* Si `already_freed`, le programme ne renvoiera pas d'erreur si +* le bloc correspondant à l'élément est déjà libéré +* (dans l'utilisation de `free_all_memory()` par exemple) */ -void gree(void* ptr); +void gree(void* ptr, bool already_freed); #endif \ No newline at end of file diff --git a/src/common/memory_management.c b/src/common/memory_management.c index 5a403dc..76216f7 100644 --- a/src/common/memory_management.c +++ b/src/common/memory_management.c @@ -40,6 +40,7 @@ int get_memory_blocks_number() { return get_length(memory); } + void print_memory_rec(Memory* mem) { if (!mem) { return; @@ -51,12 +52,43 @@ void print_memory_rec(Memory* mem) { print_memory_rec(mem->next); } - void print_memory() { printf(BLUE "==== MEMORY ====\n" RESET); print_memory_rec(memory); } + +#ifdef __CUDACC__ +extern "C" +#endif +void free_all_memory() { + pthread_mutex_lock(&memory_lock); // We don't want ANY interruption so we lock here + + free_all_memory_rec(memory); + #ifdef MEMORY_TAIL_OPT + tail = NULL; + #endif + + pthread_mutex_unlock(&memory_lock); +} + +void free_all_memory_rec(Memory* mem) { + if (!mem) { + return; + } + Memory* next = mem->next; + + #ifdef __CUDACC__ + cudaFree(mem->start); + #else + free(mem->start); + #endif + free(mem); + + free_all_memory_rec(next); +} + + Memory* create_memory_block(size_t size) { Memory* mem = (Memory*)malloc(sizeof(Memory)); #ifdef __CUDACC__ @@ -112,8 +144,8 @@ void* allocate_memory(int nb_elements, size_t size, Memory* mem) { } -Memory* free_memory(void* ptr, Memory* mem) { - if (!mem) { +Memory* free_memory(void* ptr, Memory* mem, bool already_freed) { + if (!mem && !already_freed) { printf_error((char*)"Le pointeur "); printf("%p a déjà été libéré ou n'a jamais été alloué\n", ptr); return mem; @@ -141,7 +173,7 @@ Memory* free_memory(void* ptr, Memory* mem) { return mem; } } else { - mem->next = free_memory(ptr, mem->next); + mem->next = free_memory(ptr, mem->next, already_freed); return mem; } } @@ -176,10 +208,10 @@ void* nalloc(int nb_elements, size_t size) { #ifdef __CUDACC__ extern "C" #endif -void gree(void* ptr) { +void gree(void* ptr, bool already_freed) { #if defined(__CUDACC__) || defined(TEST_MEMORY_MANAGEMENT) pthread_mutex_lock(&memory_lock); - memory = free_memory(ptr, memory); + memory = free_memory(ptr, memory, already_freed); pthread_mutex_unlock(&memory_lock); #else free(ptr); diff --git a/src/common/memory_management.cu b/src/common/memory_management.cu index 5a403dc..76216f7 100644 --- a/src/common/memory_management.cu +++ b/src/common/memory_management.cu @@ -40,6 +40,7 @@ int get_memory_blocks_number() { return get_length(memory); } + void print_memory_rec(Memory* mem) { if (!mem) { return; @@ -51,12 +52,43 @@ void print_memory_rec(Memory* mem) { print_memory_rec(mem->next); } - void print_memory() { printf(BLUE "==== MEMORY ====\n" RESET); print_memory_rec(memory); } + +#ifdef __CUDACC__ +extern "C" +#endif +void free_all_memory() { + pthread_mutex_lock(&memory_lock); // We don't want ANY interruption so we lock here + + free_all_memory_rec(memory); + #ifdef MEMORY_TAIL_OPT + tail = NULL; + #endif + + pthread_mutex_unlock(&memory_lock); +} + +void free_all_memory_rec(Memory* mem) { + if (!mem) { + return; + } + Memory* next = mem->next; + + #ifdef __CUDACC__ + cudaFree(mem->start); + #else + free(mem->start); + #endif + free(mem); + + free_all_memory_rec(next); +} + + Memory* create_memory_block(size_t size) { Memory* mem = (Memory*)malloc(sizeof(Memory)); #ifdef __CUDACC__ @@ -112,8 +144,8 @@ void* allocate_memory(int nb_elements, size_t size, Memory* mem) { } -Memory* free_memory(void* ptr, Memory* mem) { - if (!mem) { +Memory* free_memory(void* ptr, Memory* mem, bool already_freed) { + if (!mem && !already_freed) { printf_error((char*)"Le pointeur "); printf("%p a déjà été libéré ou n'a jamais été alloué\n", ptr); return mem; @@ -141,7 +173,7 @@ Memory* free_memory(void* ptr, Memory* mem) { return mem; } } else { - mem->next = free_memory(ptr, mem->next); + mem->next = free_memory(ptr, mem->next, already_freed); return mem; } } @@ -176,10 +208,10 @@ void* nalloc(int nb_elements, size_t size) { #ifdef __CUDACC__ extern "C" #endif -void gree(void* ptr) { +void gree(void* ptr, bool already_freed) { #if defined(__CUDACC__) || defined(TEST_MEMORY_MANAGEMENT) pthread_mutex_lock(&memory_lock); - memory = free_memory(ptr, memory); + memory = free_memory(ptr, memory, already_freed); pthread_mutex_unlock(&memory_lock); #else free(ptr); diff --git a/test/cnn_convolution.cu b/test/cnn_convolution.cu index 9ca2b25..d7765de 100644 --- a/test/cnn_convolution.cu +++ b/test/cnn_convolution.cu @@ -72,11 +72,11 @@ float*** create_empty_matrix(int n, int p, int q) { void free_matrix(float*** matrix, int n, int p) { for (int i=0; i < n; i++) { for (int j=0; j < p; j++) { - gree(matrix[i][j]); + gree(matrix[i][j], false); } - gree(matrix[i]); + gree(matrix[i], false); } - gree(matrix); + gree(matrix, false); } bool check_matrices_equality(float*** m1, float*** m2, int n, int p, int q, int acceptation) { @@ -177,11 +177,11 @@ void run_convolution_test(int input_width, int output_width, int rows, int colum free_matrix(kernel->v_d_weights[i], kernel->columns, kernel->k_size); #endif } - gree(kernel->weights); - gree(kernel->d_weights); + gree(kernel->weights, false); + gree(kernel->d_weights, false); #ifdef ADAM_CNN_WEIGHTS - gree(kernel->s_d_weights); - gree(kernel->v_d_weights); + gree(kernel->s_d_weights, false); + gree(kernel->v_d_weights, false); #endif free_matrix(input, kernel->rows, input_width); diff --git a/test/cnn_function.cu b/test/cnn_function.cu index 09a69bd..4d191f8 100644 --- a/test/cnn_function.cu +++ b/test/cnn_function.cu @@ -76,13 +76,13 @@ void test1(int activation, bool use_local_kernel) { exit(1); } } - gree(input[i][j]); + gree(input[i][j], false); free(input_initial[i][j]); } - gree(input[i]); + gree(input[i], false); free(input_initial[i]); } - gree(input); + gree(input, false); free(input_initial); printf("\t" GREEN "OK\n" RESET); diff --git a/test/cnn_matrix_multiplication.cu b/test/cnn_matrix_multiplication.cu index 6930e84..e00a260 100644 --- a/test/cnn_matrix_multiplication.cu +++ b/test/cnn_matrix_multiplication.cu @@ -104,24 +104,24 @@ void run_matrices_test(int n, int p, int q) { // On libère l'espace mémoire alloué for (int i=0; i < n; i++) { - gree(matrix1[i]); + gree(matrix1[i], false); } - gree(matrix1); + gree(matrix1, false); for (int i=0; i < p; i++) { - gree(matrix2[i]); + gree(matrix2[i], false); } - gree(matrix2); + gree(matrix2, false); for (int i=0; i < n; i++) { - gree(result_cpu[i]); + gree(result_cpu[i], false); } - gree(result_cpu); + gree(result_cpu, false); for (int i=0; i < n; i++) { - gree(result_gpu[i]); + gree(result_gpu[i], false); } - gree(result_gpu); + gree(result_gpu, false); } diff --git a/test/memory_management.c b/test/memory_management.c index 7524393..5a086f2 100644 --- a/test/memory_management.c +++ b/test/memory_management.c @@ -28,7 +28,7 @@ int main() { printf_error((char*)"Plus d'un élément de mémoire alloué en une seule allocation\n"); exit(1); } - gree(ptr); + gree(ptr, false); if (! (get_memory_blocks_number() == blocks_used)) { printf_error((char*)"La mémoire n'a pas été libérée correctement\n"); exit(1); @@ -56,11 +56,11 @@ int main() { // We test that the memory does not overlap itself assert(pointeurs[i][j] == i); } - gree(pointeurs[i]); + gree(pointeurs[i], false); } - gree(ptr1); - gree(ptr2); + gree(ptr1, false); + gree(ptr2, false); if (! (get_memory_distinct_allocations() == 0 && get_memory_blocks_number() == 0)) { printf_error((char*)"La mémoire n'a pas été libérée correctement\n"); exit(1); diff --git a/test/memory_management.cu b/test/memory_management.cu index 29ea299..a575a55 100644 --- a/test/memory_management.cu +++ b/test/memory_management.cu @@ -45,7 +45,7 @@ int main() { printf("Plus d'un élément de mémoire alloué en une seule allocation\n"); exit(1); } - gree(ptr); + gree(ptr, false); if (! (get_memory_blocks_number() == blocks_used)) { printf("La mémoire n'a pas été libérée correctement\n"); exit(1); @@ -86,11 +86,11 @@ int main() { // We test that the memory does not overlap itself assert(pointeurs[i][j] == i+1); } - gree(pointeurs[i]); + gree(pointeurs[i], false); } - gree(ptr1); - gree(ptr2); + gree(ptr1, false); + gree(ptr2, false); if (! (get_memory_distinct_allocations() == 0 && get_memory_blocks_number() == 0)) { printf("La mémoire n'a pas été libérée correctement\n"); exit(1);