diff --git a/src/cnn/train.c b/src/cnn/train.c index 3e3dd80..7b858e9 100644 --- a/src/cnn/train.c +++ b/src/cnn/train.c @@ -4,6 +4,7 @@ #include #include #include +#include #include "../mnist/include/mnist.h" #include "include/initialisation.h" @@ -92,6 +93,13 @@ void train(int dataset_type, char* images_file, char* labels_file, char* data_di jpegDataset* dataset; // Structure de données décrivant un dataset d'images jpeg int* shuffle_index; // shuffle_index[i] contient le nouvel index de l'élément à l'emplacement i avant mélange + double start_time, end_time; + double elapsed_time; + + double algo_start = omp_get_wtime(); + + start_time = omp_get_wtime(); + if (dataset_type == 0) { // Type MNIST // Chargement des images du set de données MNIST int* parameters = read_mnist_images_parameters(images_file); @@ -113,7 +121,7 @@ void train(int dataset_type, char* images_file, char* labels_file, char* data_di // Initialisation du réseau if (!recover) { - network = create_network_lenet5(1, 0, TANH, GLOROT, input_dim, input_depth); + network = create_network_lenet5(0.1, 0, TANH, GLOROT, input_dim, input_depth); } else { network = read_network(recover); } @@ -179,8 +187,14 @@ void train(int dataset_type, char* images_file, char* labels_file, char* data_di train_params->nb_images = BATCHES; train_params->index = shuffle_index; #endif + end_time = omp_get_wtime(); + + elapsed_time = end_time - start_time; + printf("Initialisation: %0.2lf s\n\n", elapsed_time); for (int i=0; i < epochs; i++) { + + start_time = omp_get_wtime(); // La variable accuracy permet d'avoir une ESTIMATION // du taux de réussite et de l'entraînement du réseau, // mais n'est en aucun cas une valeur réelle dans le cas @@ -213,12 +227,21 @@ void train(int dataset_type, char* images_file, char* labels_file, char* data_di train_parameters[k]->start = BATCHES*j + (BATCHES/nb_threads)*k; train_parameters[k]->network = copy_network(network); - pthread_create( &tid[k], NULL, train_thread, (void*) train_parameters[k]); + if (train_parameters[k]->start+train_parameters[k]->nb_images >= nb_images_total) { + train_parameters[k]->nb_images = nb_images_total - train_parameters[k]->start -1; + } + if (train_parameters[k]->nb_images > 0) { + pthread_create( &tid[k], NULL, train_thread, (void*) train_parameters[k]); + } else { + tid[k] = 0; + } } for (int k=0; k < nb_threads; k++) { // On attend la terminaison de chaque thread un à un - pthread_join( tid[k], NULL ); - accuracy += train_parameters[k]->accuracy / (float) nb_images_total; + if (tid[k] != 0) { + pthread_join( tid[k], NULL ); + accuracy += train_parameters[k]->accuracy / (float) nb_images_total; + } } // On attend que tous les fils aient fini avant d'appliquer des modifications au réseau principal @@ -228,7 +251,7 @@ void train(int dataset_type, char* images_file, char* labels_file, char* data_di free_network(train_parameters[k]->network); } current_accuracy = accuracy * nb_images_total/((j+1)*BATCHES); - printf("\rThreads [%d]\tÉpoque [%d/%d]\tImage [%d/%d]\tAccuracy: "YELLOW"%0.1f%%"RESET" ", nb_threads, i, epochs, BATCHES*(j+1), nb_images_total, current_accuracy*100); + printf("\rThreads [%d]\tÉpoque [%d/%d]\tImage [%d/%d]\tAccuracy: "YELLOW"%0.2f%%"RESET" ", nb_threads, i, epochs, BATCHES*(j+1), nb_images_total, current_accuracy*100); fflush(stdout); #else (void)nb_images_total_remaining; // Juste pour enlever un warning @@ -248,14 +271,16 @@ void train(int dataset_type, char* images_file, char* labels_file, char* data_di update_weights(network, network, train_params->nb_images); update_bias(network, network, train_params->nb_images); - printf("\rÉpoque [%d/%d]\tImage [%d/%d]\tAccuracy: "YELLOW"%0.1f%%"RESET" ", i, epochs, BATCHES*(j+1), nb_images_total, current_accuracy*100); + printf("\rÉpoque [%d/%d]\tImage [%d/%d]\tAccuracy: "YELLOW"%0.4f%%"RESET" ", i, epochs, BATCHES*(j+1), nb_images_total, current_accuracy*100); fflush(stdout); #endif } + end_time = omp_get_wtime(); + elapsed_time = end_time - start_time; #ifdef USE_MULTITHREADING - printf("\rThreads [%d]\tÉpoque [%d/%d]\tImage [%d/%d]\tAccuracy: "GREEN"%0.1f%%"RESET" \n", nb_threads, i, epochs, nb_images_total, nb_images_total, accuracy*100); + printf("\rThreads [%d]\tÉpoque [%d/%d]\tImage [%d/%d]\tAccuracy: "GREEN"%0.4f%%"RESET"\tTemps: %0.2f s\n", nb_threads, i, epochs, nb_images_total, nb_images_total, accuracy*100, elapsed_time); #else - printf("\rÉpoque [%d/%d]\tImage [%d/%d]\tAccuracy: "GREEN"%0.1f%%"RESET" \n", i, epochs, nb_images_total, nb_images_total, accuracy*100); + printf("\rÉpoque [%d/%d]\tImage [%d/%d]\tAccuracy: "GREEN"%0.4f%%"RESET"\tTemps: %0.2f s\n", i, epochs, nb_images_total, nb_images_total, accuracy*100, elapsed_time); #endif write_network(out, network); } @@ -266,4 +291,7 @@ void train(int dataset_type, char* images_file, char* labels_file, char* data_di #else free(train_params); #endif + end_time = omp_get_wtime(); + elapsed_time = end_time - algo_start; + printf("\nTemps total: %0.1f s\n", elapsed_time); } diff --git a/test/cnn_convolution.cu b/test/cnn_convolution.cu index d290adc..4e4779d 100644 --- a/test/cnn_convolution.cu +++ b/test/cnn_convolution.cu @@ -4,6 +4,7 @@ #include #include #include +#include #include "../src/cnn/include/convolution.h" #include "../src/cnn/include/struct.h" @@ -122,22 +123,23 @@ void run_convolution_test(int input_dim, int output_dim, int rows, int columns) // Lancement des calculs - clock_t start, end; + double start_time, end_time; double cpu_time_used, gpu_time_used; - start = clock(); + start_time = omp_get_wtime(); make_convolution_device(kernel, input, output_gpu, output_dim); - end = clock(); + end_time = omp_get_wtime(); - gpu_time_used = ((double) (end - start)) / CLOCKS_PER_SEC; + + gpu_time_used = end_time - start_time; printf("(%d, %d, %d, %d) Time used for GPU: %lf seconds\n", rows, columns, input_dim, output_dim, gpu_time_used); - start = clock(); + start_time = omp_get_wtime(); make_convolution_cpu(kernel, input, output_cpu, output_dim); - end = clock(); + end_time = omp_get_wtime(); - cpu_time_used = ((double) (end - start)) / CLOCKS_PER_SEC; + cpu_time_used = end_time - start_time; printf("(%d, %d, %d, %d) Time used for CPU: %lf seconds\n", rows, columns, input_dim, output_dim, cpu_time_used); // Vérification de l'égalité des matrices diff --git a/test/cnn_matrix_multiplication.cu b/test/cnn_matrix_multiplication.cu index 2b8825c..69f1ce7 100644 --- a/test/cnn_matrix_multiplication.cu +++ b/test/cnn_matrix_multiplication.cu @@ -3,6 +3,7 @@ #include #include #include +#include #include "../src/cnn/include/matrix_multiplication.h" #include "../src/include/colors.h" @@ -70,7 +71,7 @@ bool check_matrices_equality(float** m1, float** m2, int n, int p, int acceptati } void run_matrices_test(int n, int p, int q) { - clock_t start, end; + double start_time, end_time; double cpu_time_used, gpu_time_used; float** matrix1 = create_matrix(n, p); @@ -79,18 +80,18 @@ void run_matrices_test(int n, int p, int q) { float** result_cpu = create_empty_matrix(n, q); printf("(%d,%d)x(%d,%d) Data generation complete.\n", n, p, p, q); - start = clock(); + start_time = omp_get_wtime(); matrix_multiplication_device(matrix1, matrix2, result_gpu, n, p, q); - end = clock(); + end_time = omp_get_wtime(); - cpu_time_used = ((double) (end - start)) / CLOCKS_PER_SEC; + cpu_time_used = end_time - start_time; printf("(%d,%d)x(%d,%d) Time used for GPU: %lf seconds\n", n, p, p, q, cpu_time_used); - start = clock(); + start_time = omp_get_wtime(); matrix_multiplication_host(matrix1, matrix2, result_cpu, n, p, q); - end = clock(); + end_time = omp_get_wtime(); - gpu_time_used = ((double) (end - start)) / CLOCKS_PER_SEC; + gpu_time_used = end_time - start_time; printf("(%d,%d)x(%d,%d) Time used for CPU: %lf seconds\n", n, p, p, q, gpu_time_used); // Vérification de l'égalité des matrices