From 5c712c0120b5046573e9b7cd25b2feb3a7522b18 Mon Sep 17 00:00:00 2001 From: augustin64 Date: Mon, 26 Jun 2023 16:39:25 +0200 Subject: [PATCH 1/3] Remove unnecessary includes --- src/cnn/cnn.c | 5 ----- src/cnn/main.c | 4 ---- src/cnn/test_network.c | 1 - 3 files changed, 10 deletions(-) diff --git a/src/cnn/cnn.c b/src/cnn/cnn.c index 639f232..f313253 100644 --- a/src/cnn/cnn.c +++ b/src/cnn/cnn.c @@ -9,11 +9,6 @@ #include "../common/include/colors.h" #include "../common/include/utils.h" #include "include/backpropagation.h" -#include "include/initialisation.h" -#include "include/convolution.h" -#include "include/function.h" -#include "include/creation.h" -#include "include/update.h" #include "include/make.h" #include "include/cnn.h" diff --git a/src/cnn/main.c b/src/cnn/main.c index 218b17c..aecee15 100644 --- a/src/cnn/main.c +++ b/src/cnn/main.c @@ -7,12 +7,8 @@ #include "../common/include/colors.h" -#include "include/initialisation.h" #include "include/test_network.h" -#include "include/function.h" -#include "include/creation.h" #include "include/train.h" -#include "include/cnn.h" #include "include/main.h" diff --git a/src/cnn/test_network.c b/src/cnn/test_network.c index 88c47f6..f42a28e 100644 --- a/src/cnn/test_network.c +++ b/src/cnn/test_network.c @@ -7,7 +7,6 @@ #include "../common/include/memory_management.h" #include "../common/include/mnist.h" #include "include/neuron_io.h" -#include "include/struct.h" #include "include/jpeg.h" #include "include/free.h" #include "include/cnn.h" From 6c7112b9b501b68e0634906ed3235c556fa14943 Mon Sep 17 00:00:00 2001 From: augustin64 Date: Mon, 26 Jun 2023 16:45:06 +0200 Subject: [PATCH 2/3] remove openmp dependency time may not be accurate (now using processor time instead of real time) --- Makefile | 4 +-- src/cnn/train.c | 42 +++++++++++++++---------------- src/common/colors.c | 13 ++++++---- src/common/include/colors.h | 3 ++- test/cnn_convolution.cu | 16 ++++++------ test/cnn_jpeg.c | 8 +++--- test/cnn_matrix_multiplication.cu | 16 ++++++------ 7 files changed, 53 insertions(+), 49 deletions(-) diff --git a/Makefile b/Makefile index 0d66068..8fb474b 100644 --- a/Makefile +++ b/Makefile @@ -27,8 +27,8 @@ TESTS_SRC_CU += $(wildcard $(TEST_SRCDIR)/*.cu) TESTS_OBJ = $(TESTS_SRC:$(TEST_SRCDIR)/%.c=$(BUILDDIR)/$(TEST_SRCDIR)-%) $(TESTS_SRC_CU:$(TEST_SRCDIR)/%.cu=$(BUILDDIR)/$(TEST_SRCDIR)-%) # Linker only flags -LD_CFLAGS = -lm -lpthread -ljpeg -fopenmp -LD_NVCCFLAGS = -ljpeg -Xcompiler -fopenmp +LD_CFLAGS = -lm -lpthread -ljpeg +LD_NVCCFLAGS = -ljpeg # Compilation flag CFLAGS = -Wall -Wextra -std=gnu99 -g -O3 diff --git a/src/cnn/train.c b/src/cnn/train.c index 5840585..36921c0 100644 --- a/src/cnn/train.c +++ b/src/cnn/train.c @@ -5,7 +5,7 @@ #include #include #include -#include +#include #include "../common/include/memory_management.h" #include "../common/include/colors.h" @@ -64,7 +64,7 @@ void* train_thread(void* parameters) { float loss = 0.; #ifdef DETAILED_TRAIN_TIMINGS - double start_time; + clock_t start_time; #endif pthread_t tid; @@ -81,16 +81,16 @@ void* train_thread(void* parameters) { write_image_in_network_32(images[index[i]], height, width, network->input[0][0], param->offset); #ifdef DETAILED_TRAIN_TIMINGS - start_time = omp_get_wtime(); + start_time = clock(); #endif forward_propagation(network); #ifdef DETAILED_TRAIN_TIMINGS printf("Temps de forward: "); - printf_time(omp_get_wtime() - start_time); + printf_time(clock() - start_time); printf("\n"); - start_time = omp_get_wtime(); + start_time = clock(); #endif maxi = indice_max(network->input[network->size-1][0][0], 10); @@ -108,9 +108,9 @@ void* train_thread(void* parameters) { #ifdef DETAILED_TRAIN_TIMINGS printf("Temps de backward: "); - printf_time(omp_get_wtime() - start_time); + printf_time(clock() - start_time); printf("\n"); - start_time = omp_get_wtime(); + start_time = clock(); #endif if (maxi == labels[index[i]]) { @@ -131,16 +131,16 @@ void* train_thread(void* parameters) { write_256_image_in_network(param->dataset->images[index[i]], width, height, param->dataset->numComponents, network->width[0], network->input[0]); #ifdef DETAILED_TRAIN_TIMINGS - start_time = omp_get_wtime(); + start_time = clock(); #endif forward_propagation(network); #ifdef DETAILED_TRAIN_TIMINGS printf("Temps de forward: "); - printf_time(omp_get_wtime() - start_time); + printf_time(clock() - start_time); printf("\n"); - start_time = omp_get_wtime(); + start_time = clock(); #endif maxi = indice_max(network->input[network->size-1][0][0], param->dataset->numCategories); @@ -148,9 +148,9 @@ void* train_thread(void* parameters) { #ifdef DETAILED_TRAIN_TIMINGS printf("Temps de backward: "); - printf_time(omp_get_wtime() - start_time); + printf_time(clock() - start_time); printf("\n"); - start_time = omp_get_wtime(); + start_time = clock(); #endif @@ -179,7 +179,7 @@ void train(int dataset_type, char* images_file, char* labels_file, char* data_di exit(1); } #endif - srand(time(NULL)); + srand(clock()); float loss; float batch_loss; // May be redundant with loss, but gives more informations float test_accuracy = 0.; // Used to decrease Learning rate @@ -190,12 +190,12 @@ void train(int dataset_type, char* images_file, char* labels_file, char* data_di //* Différents timers pour mesurer les performance en terme de vitesse - double start_time, end_time; - double elapsed_time; + clock_t start_time, end_time; + clock_t elapsed_time; - double algo_start = omp_get_wtime(); + clock_t algo_start = clock(); - start_time = omp_get_wtime(); + start_time = clock(); //* Chargement du dataset @@ -320,7 +320,7 @@ void train(int dataset_type, char* images_file, char* labels_file, char* data_di train_params->finetuning = finetuning; #endif - end_time = omp_get_wtime(); + end_time = clock(); elapsed_time = end_time - start_time; printf("Taux d'apprentissage initial: %0.2e\n", network->learning_rate); @@ -331,7 +331,7 @@ void train(int dataset_type, char* images_file, char* labels_file, char* data_di //* Boucle d'apprentissage for (int i=0; i < epochs; i++) { - start_time = omp_get_wtime(); + start_time = clock(); // La variable accuracy permet d'avoir une ESTIMATION // du taux de réussite et de l'entraînement du réseau, // mais n'est en aucun cas une valeur réelle dans le cas @@ -423,7 +423,7 @@ void train(int dataset_type, char* images_file, char* labels_file, char* data_di #endif } //* Fin d'une époque: affichage des résultats et sauvegarde du réseau - end_time = omp_get_wtime(); + end_time = clock(); elapsed_time = end_time - start_time; #ifdef USE_MULTITHREADING printf("\rThreads [%d]\tÉpoque [%d/%d]\tImage [%d/%d]\tAccuracy: " GREEN "%0.4f%%" RESET " \tLoss: %lf\tTemps: ", nb_threads, i, epochs, nb_images_total, nb_images_total, accuracy*100, loss); @@ -483,7 +483,7 @@ void train(int dataset_type, char* images_file, char* labels_file, char* data_di free_dataset(dataset); } - end_time = omp_get_wtime(); + end_time = clock(); elapsed_time = end_time - algo_start; printf("\nTemps total: "); printf_time(elapsed_time); diff --git a/src/common/colors.c b/src/common/colors.c index f4bcaab..0b368a5 100644 --- a/src/common/colors.c +++ b/src/common/colors.c @@ -1,5 +1,6 @@ #include #include +#include #include "include/colors.h" @@ -15,11 +16,13 @@ void printf_info(char* string) { printf(BOLDBLUE "[ INFO ]" RESET " %s", string); } -void printf_time(float time) { - int hours = time/3600; - int minutes = ((int)time %3600)/60; - int seconds = ((int)time) %60; - int milliseconds = (time - (int)time)*1000; +void printf_time(clock_t time) { + double real_time = (double) time / CLOCKS_PER_SEC; + + int hours = real_time/3600; + int minutes = ((int)real_time %3600)/60; + int seconds = ((int)real_time) %60; + int milliseconds = (real_time - (int)real_time)*1000; if (hours != 0) { printf("%dh %dmn", hours, minutes); diff --git a/src/common/include/colors.h b/src/common/include/colors.h index f0bd7e8..9ff593a 100644 --- a/src/common/include/colors.h +++ b/src/common/include/colors.h @@ -1,4 +1,5 @@ #include +#include #ifndef DEF_COLORS_H #define DEF_COLORS_H @@ -51,7 +52,7 @@ extern "C" /* * Affiche un timing en heures minutes secondes millisecondes en limitant la précision aux deux unités les plus significatives */ -void printf_time(float time); +void printf_time(clock_t time); #ifdef __CUDACC__ extern "C" diff --git a/test/cnn_convolution.cu b/test/cnn_convolution.cu index 5dcba85..bf2f963 100644 --- a/test/cnn_convolution.cu +++ b/test/cnn_convolution.cu @@ -4,7 +4,7 @@ #include #include #include -#include +#include #include "../src/common/include/memory_management.h" #include "../src/cnn/include/convolution.h" @@ -136,21 +136,21 @@ void run_convolution_test(int input_width, int output_width, int rows, int colum // Lancement des calculs - double start_time, end_time; - double cpu_time_used, gpu_time_used; + clock_t start_time, end_time; + clock_t cpu_time_used, gpu_time_used; - start_time = omp_get_wtime(); + start_time = clock(); make_convolution_device(kernel, input, output_gpu, output_width, 1, 0); - end_time = omp_get_wtime(); + end_time = clock(); gpu_time_used = end_time - start_time; printf("(%d, %d, %d, %d) Time used for GPU: %lf seconds\n", rows, columns, input_width, output_width, gpu_time_used); - start_time = omp_get_wtime(); + start_time = clock(); make_convolution_cpu(kernel, input, output_cpu, output_width, 1, 0); - end_time = omp_get_wtime(); + end_time = clock(); cpu_time_used = end_time - start_time; printf("(%d, %d, %d, %d) Time used for CPU: %lf seconds\n", rows, columns, input_width, output_width, cpu_time_used); @@ -199,7 +199,7 @@ int main() { } printf(GREEN "OK\n" RESET); - srand(time(NULL)); + srand(clock()); run_convolution_test(20, 15, 30, 40); run_convolution_test(30, 25, 40, 50); diff --git a/test/cnn_jpeg.c b/test/cnn_jpeg.c index 28619c7..40965ad 100644 --- a/test/cnn_jpeg.c +++ b/test/cnn_jpeg.c @@ -1,6 +1,6 @@ #include #include -#include +#include #include "../src/common/include/colors.h" @@ -21,10 +21,10 @@ int main(int argc, char* argv[]) { printf("Taille des images: %dx%d\n", dataset->width, dataset->height); // Calcul du temps de chargement des images une à une - double start_time, end_time; + clock_t start_time, end_time; int N = min(100000, dataset->numImages); - start_time = omp_get_wtime(); + start_time = clock(); printf("Chargement de %d images\n", N); for (int i=0; i < N; i++) { imgRawImage* image = loadJpegImageFile(dataset->fileNames[i]); @@ -32,7 +32,7 @@ int main(int argc, char* argv[]) { free(image); } printf("OK\n"); - end_time = omp_get_wtime(); + end_time = clock(); printf("Temps par image (calculé sur une moyenne de %d): %lf s\n", N, (end_time - start_time)/N); for (int i=0; i < (int)dataset->numImages; i++) { diff --git a/test/cnn_matrix_multiplication.cu b/test/cnn_matrix_multiplication.cu index 320cb2a..7304cac 100644 --- a/test/cnn_matrix_multiplication.cu +++ b/test/cnn_matrix_multiplication.cu @@ -3,7 +3,7 @@ #include #include #include -#include +#include #include "../src/cnn/include/matrix_multiplication.h" #include "../src/common/include/memory_management.h" @@ -72,8 +72,8 @@ bool check_matrices_equality(float** m1, float** m2, int n, int p, int acceptati } void run_matrices_test(int n, int p, int q) { - double start_time, end_time; - double cpu_time_used, gpu_time_used; + clock_t start_time, end_time; + clock_t cpu_time_used, gpu_time_used; float** matrix1 = create_matrix(n, p); float** matrix2 = create_matrix(p, q); @@ -81,16 +81,16 @@ void run_matrices_test(int n, int p, int q) { float** result_cpu = create_empty_matrix(n, q); printf("(%d,%d)x(%d,%d) Data generation complete.\n", n, p, p, q); - start_time = omp_get_wtime(); + start_time = clock(); matrix_multiplication_device(matrix1, matrix2, result_gpu, n, p, q); - end_time = omp_get_wtime(); + end_time = clock(); cpu_time_used = end_time - start_time; printf("(%d,%d)x(%d,%d) Time used for GPU: %lf seconds\n", n, p, p, q, cpu_time_used); - start_time = omp_get_wtime(); + start_time = clock(); matrix_multiplication_host(matrix1, matrix2, result_cpu, n, p, q); - end_time = omp_get_wtime(); + end_time = clock(); gpu_time_used = end_time - start_time; printf("(%d,%d)x(%d,%d) Time used for CPU: %lf seconds\n", n, p, p, q, gpu_time_used); @@ -134,7 +134,7 @@ int main() { } printf(GREEN "OK\n" RESET); - srand(time(NULL)); + srand(clock()); run_matrices_test(200, 1000, 200); run_matrices_test(200, 1000, 20); run_matrices_test(20, 1000, 200); From e661a4178d51ca1f6e646baa18187f9144a38f64 Mon Sep 17 00:00:00 2001 From: julienChemillier Date: Mon, 26 Jun 2023 18:09:21 +0200 Subject: [PATCH 3/3] Fix some MacOS compatibility issues --- Makefile | 8 ++++++++ README.md | 21 ++++++++++++++------- src/cnn/train.c | 21 +++++++++++++++++++-- src/dense/main.c | 21 +++++++++++++++++++-- test/cnn_jpeg.c | 4 +++- 5 files changed, 63 insertions(+), 12 deletions(-) diff --git a/Makefile b/Makefile index 8fb474b..c0f4c37 100644 --- a/Makefile +++ b/Makefile @@ -1,3 +1,4 @@ +OS := $(shell uname) BUILDDIR := ./build SRCDIR := ./src CACHE_DIR := ./.cache @@ -41,6 +42,13 @@ NVCCFLAGS = -g # -fsanitize=address -lasan #! WARNING: test/cnn-neuron_io fails with this option enabled +# Specify library path of libjpeg on MacOS +ifeq ($(OS),Darwin) + LD_CFLAGS += -I/opt/homebrew/Cellar/jpeg/9e/include/ -L/opt/homebrew/Cellar/jpeg/9e/lib/ + LD_NVCCFLAGS += -L/opt/homebrew/Cellar/jpeg/9e/lib/ + + CFLAGS += -I/opt/homebrew/Cellar/jpeg/9e/include/ +endif all: dense cnn; diff --git a/README.md b/README.md index f8aa7aa..8e268e6 100644 --- a/README.md +++ b/README.md @@ -178,17 +178,24 @@ Résultats avec VGG16, pour des images de 256x256 pixels (seulement une plus pet Sur le cloud avec google Colab: bon GPU mais mauvais processeur: [![Open In Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/drive/1LfwSrQRaoC91yC9mx9BKHzuc7odev5r6?usp=sharing) -Les distributions suivantes ont étés essayées, il sera sans doute nécessaire de modifier le code pour l'exécuter sous Windows/ MacOS: +## Dépendances +- `cuda` : pour utiliser la carte graphique (NVIDIA seulement) +- `libjpeg-dev` : n'est pas nécessairement installé par défaut +- GNU `make` : installé par défaut sur la majorité des distributions Linux et sur MacOS +- `gcc` : installé par défaut sur la majorité des distributions Linux et sur MacOS + +### Linux +Les distributions suivantes ont étés essayées, il faudra parfois installer `libjpeg` - Arch - Fedora - Manjaro -- Ubuntu +- Ubuntu: `apt install libjpeg-dev` -## Dépendances -- `cuda` : pour utiliser la carte graphique (NVIDIA seulement) -- `libjpeg-dev` : n'est pas installé par défaut sur ubuntu notamment -- GNU `make` : installé par défaut sur la majorité des distributions -- `gcc` : installé par défaut sur la majorité des distributions +### MacOS +Avec [Homebrew](https://brew.sh/): +```bash +brew install libjpeg +``` ## Compilation diff --git a/src/cnn/train.c b/src/cnn/train.c index 36921c0..119ca81 100644 --- a/src/cnn/train.c +++ b/src/cnn/train.c @@ -1,4 +1,3 @@ -#include #include #include #include @@ -7,6 +6,14 @@ #include #include +#ifdef __linux__ + #include +#elif defined(__APPLE__) + #include +#else + #error Unknown platform +#endif + #include "../common/include/memory_management.h" #include "../common/include/colors.h" #include "../common/include/utils.h" @@ -261,7 +268,17 @@ void train(int dataset_type, char* images_file, char* labels_file, char* data_di #ifdef USE_MULTITHREADING int nb_remaining_images; // Nombre d'images restantes à lancer pour une série de threads // Récupération du nombre de threads disponibles - int nb_threads = get_nprocs(); + #ifdef __linux__ + int nb_threads = get_nprocs(); + #elif defined(__APPLE__) + int nb_threads; + size_t len = sizeof(nb_threads); + + if (sysctlbyname("hw.logicalcpu", &nb_threads, &len, NULL, 0) == -1) { + perror("sysctl"); + exit(1); + } + #endif pthread_t *tid = (pthread_t*)malloc(nb_threads * sizeof(pthread_t)); // Création des paramètres donnés à chaque thread dans le cas du multi-threading diff --git a/src/dense/main.c b/src/dense/main.c index 5135d78..5c67cd6 100644 --- a/src/dense/main.c +++ b/src/dense/main.c @@ -4,7 +4,14 @@ #include #include #include -#include + +#ifdef __linux__ + #include +#elif defined(__APPLE__) + #include +#else + #error Unknown platform +#endif #include "include/neural_network.h" #include "../common/include/colors.h" @@ -201,7 +208,17 @@ void train(int epochs, char* recovery, char* image_file, char* label_file, char* float accuracy; float current_accuracy; - int nb_threads = get_nprocs(); + #ifdef __linux__ + int nb_threads = get_nprocs(); + #elif defined(__APPLE__) + int nb_threads; + size_t len = sizeof(nb_threads); + + if (sysctlbyname("hw.logicalcpu", &nb_threads, &len, NULL, 0) == -1) { + perror("sysctl"); + exit(1); + } + #endif pthread_t *tid = (pthread_t *)malloc(nb_threads * sizeof(pthread_t)); /* diff --git a/test/cnn_jpeg.c b/test/cnn_jpeg.c index 40965ad..bcc8869 100644 --- a/test/cnn_jpeg.c +++ b/test/cnn_jpeg.c @@ -33,7 +33,9 @@ int main(int argc, char* argv[]) { } printf("OK\n"); end_time = clock(); - printf("Temps par image (calculé sur une moyenne de %d): %lf s\n", N, (end_time - start_time)/N); + printf("Temps par image (calculé sur une moyenne de %d): ", N); + printf_time((end_time - start_time)/N); + printf("\n"); for (int i=0; i < (int)dataset->numImages; i++) { if (!dataset->fileNames[i]) {