From d0b7781695cfac7f17ac18e544a9d3341bdb2d32 Mon Sep 17 00:00:00 2001 From: augustin64 Date: Sat, 19 Nov 2022 16:09:07 +0100 Subject: [PATCH 1/5] Add jpeg dataset implementation --- .vscode/launch.json | 126 +++++++++++++++- Makefile | 14 +- src/cnn/cnn.c | 24 +++ src/cnn/include/cnn.h | 5 + src/cnn/include/jpeg.h | 69 +++++++++ src/cnn/include/train.h | 22 +-- src/cnn/jpeg.c | 317 ++++++++++++++++++++++++++++++++++++++++ src/cnn/train.c | 51 ++++--- test/cnn_jpeg.c | 44 ++++++ 9 files changed, 633 insertions(+), 39 deletions(-) create mode 100644 src/cnn/include/jpeg.h create mode 100644 src/cnn/jpeg.c create mode 100644 test/cnn_jpeg.c diff --git a/.vscode/launch.json b/.vscode/launch.json index 5c80732..7823de4 100644 --- a/.vscode/launch.json +++ b/.vscode/launch.json @@ -55,16 +55,109 @@ "preLaunchTask": "build-mnist" }, { - "name": "cnn/main train", + "name": "cnn/main train mnist", "type": "cppdbg", "request": "launch", "program": "${workspaceFolder}/build/cnn-main", "args": [ "train", - "--dataset", "mnist", - "--images","data/mnist/train-images-idx3-ubyte", - "--labels","data/mnist/train-labels-idx1-ubyte", - "--epochs", "10" + "--dataset", + "mnist", + "--images", + "data/mnist/train-images-idx3-ubyte", + "--labels", + "data/mnist/train-labels-idx1-ubyte", + "--epochs", + "10" + ], + "stopAtEntry": true, + "cwd": "${workspaceFolder}", + "environment": [], + "externalConsole": false, + "MIMode": "gdb", + "miDebuggerPath": "/usr/bin/gdb", + "setupCommands": [ + { + "description": "Enable pretty-printing for gdb", + "text": "-enable-pretty-printing", + "ignoreFailures": false + } + ], + "preLaunchTask": "build-cnn" + }, + { + "name": "cnn/main train-test mnist", + "type": "cppdbg", + "request": "launch", + "program": "${workspaceFolder}/build/cnn-main", + "args": [ + "train", + "--dataset", + "mnist", + "--images", + "data/mnist/t10k-images-idx3-ubyte", + "--labels", + "data/mnist/t10k-labels-idx1-ubyte", + "--epochs", + "3" + ], + "stopAtEntry": true, + "cwd": "${workspaceFolder}", + "environment": [], + "externalConsole": false, + "MIMode": "gdb", + "miDebuggerPath": "/usr/bin/gdb", + "setupCommands": [ + { + "description": "Enable pretty-printing for gdb", + "text": "-enable-pretty-printing", + "ignoreFailures": false + } + ], + "preLaunchTask": "build-cnn" + }, + { + "name": "cnn/main train jpg", + "type": "cppdbg", + "request": "launch", + "program": "${workspaceFolder}/build/cnn-main", + "args": [ + "train", + "--dataset", + "jpg", + "--datadir", + "data/50States10K/train", + "--epochs", + "10" + ], + "stopAtEntry": true, + "cwd": "${workspaceFolder}", + "environment": [], + "externalConsole": false, + "MIMode": "gdb", + "miDebuggerPath": "/usr/bin/gdb", + "setupCommands": [ + { + "description": "Enable pretty-printing for gdb", + "text": "-enable-pretty-printing", + "ignoreFailures": false + } + ], + "preLaunchTask": "build-cnn" + }, + { + "name": "cnn/main train-test jpg", + "type": "cppdbg", + "request": "launch", + "program": "${workspaceFolder}/build/cnn-main", + "args": [ + "train", + "--dataset", + "jpg", + "--datadir", + "data/50States10K/test", + "--epochs", + "10" ], "stopAtEntry": true, "cwd": "${workspaceFolder}", @@ -123,6 +216,29 @@ "ignoreFailures": false } ] + }, + { + "name": "C/C++ Runner: Debug Session", + "type": "cppdbg", + "request": "launch", + "args": [ + "" + ], + "stopAtEntry": false, + "cwd": "/home/augustin64/Documents/tipe/projet-tipe", + "environment": [], + "program": "/home/augustin64/Documents/tipe/projet-tipe/build/Debug/outDebug", + "internalConsoleOptions": "openOnSessionStart", + "MIMode": "gdb", + "miDebuggerPath": "/usr/bin/gdb", + "externalConsole": false, + "setupCommands": [ + { + "description": "Enable pretty-printing for gdb", + "text": "-enable-pretty-printing", + "ignoreFailures": true + } + ] } ] } \ No newline at end of file diff --git a/Makefile b/Makefile index 740ca4e..6624eb4 100644 --- a/Makefile +++ b/Makefile @@ -25,12 +25,12 @@ TESTS_SRC_CU += $(wildcard test/*.cu) TESTS_OBJ = $(TESTS_SRC:test/%.c=$(BUILDDIR)/test-%) $(TESTS_SRC_CU:test/%.cu=$(BUILDDIR)/test-%) # Compile flags -CFLAGS = -std=c99 -lm -lpthread -NVCCFLAGS = +CFLAGS = -std=gnu99 -lm -lpthread -ljpeg +NVCCFLAGS = -ljpeg # Additional warning rules -CFLAGS += -Wall -Wextra -NVCCFLAGS += +CFLAGS += -Wall -Wextra +NVCCFLAGS += # Remove warnings about unused variables, functions, ... # -Wno-unused-parameter -Wno-unused-function -Wno-unused-variable -Wno-unused-but-set-variable # Compile with debug @@ -63,12 +63,12 @@ $(BUILDDIR)/mnist_%.o: $(MNIST_SRCDIR)/%.c $(MNIST_SRCDIR)/include/%.h # # Build cnn # -cnn: $(BUILDDIR)/cnn-main; +cnn: $(BUILDDIR)/cnn-main $(BUILDDIR)/cnn-main-cuda; -$(BUILDDIR)/cnn-main: $(CNN_SRCDIR)/main.c $(BUILDDIR)/cnn_train.o $(BUILDDIR)/cnn_cnn.o $(BUILDDIR)/cnn_creation.o $(BUILDDIR)/cnn_initialisation.o $(BUILDDIR)/cnn_make.o $(BUILDDIR)/cnn_neuron_io.o $(BUILDDIR)/cnn_function.o $(BUILDDIR)/cnn_utils.o $(BUILDDIR)/cnn_update.o $(BUILDDIR)/cnn_free.o $(BUILDDIR)/cnn_convolution.o $(BUILDDIR)/cnn_backpropagation.o $(BUILDDIR)/colors.o $(BUILDDIR)/mnist.o +$(BUILDDIR)/cnn-main: $(CNN_SRCDIR)/main.c $(BUILDDIR)/cnn_train.o $(BUILDDIR)/cnn_cnn.o $(BUILDDIR)/cnn_creation.o $(BUILDDIR)/cnn_initialisation.o $(BUILDDIR)/cnn_make.o $(BUILDDIR)/cnn_neuron_io.o $(BUILDDIR)/cnn_function.o $(BUILDDIR)/cnn_utils.o $(BUILDDIR)/cnn_update.o $(BUILDDIR)/cnn_free.o $(BUILDDIR)/cnn_jpeg.o $(BUILDDIR)/cnn_convolution.o $(BUILDDIR)/cnn_backpropagation.o $(BUILDDIR)/colors.o $(BUILDDIR)/mnist.o $(CC) $^ -o $@ $(CFLAGS) -$(BUILDDIR)/cnn-main-cuda: $(BUILDDIR)/cnn_main.o $(BUILDDIR)/cnn_train.o $(BUILDDIR)/cnn_cnn.o $(BUILDDIR)/cnn_creation.o $(BUILDDIR)/cnn_initialisation.o $(BUILDDIR)/cnn_make.o $(BUILDDIR)/cnn_neuron_io.o $(BUILDDIR)/cnn_function.o $(BUILDDIR)/cnn_utils.o $(BUILDDIR)/cnn_update.o $(BUILDDIR)/cnn_free.o $(BUILDDIR)/cnn_cuda_convolution.o $(BUILDDIR)/cnn_backpropagation.o $(BUILDDIR)/cuda_utils.o $(BUILDDIR)/colors.o $(BUILDDIR)/mnist.o +$(BUILDDIR)/cnn-main-cuda: $(BUILDDIR)/cnn_main.o $(BUILDDIR)/cnn_train.o $(BUILDDIR)/cnn_cnn.o $(BUILDDIR)/cnn_creation.o $(BUILDDIR)/cnn_initialisation.o $(BUILDDIR)/cnn_make.o $(BUILDDIR)/cnn_neuron_io.o $(BUILDDIR)/cnn_function.o $(BUILDDIR)/cnn_utils.o $(BUILDDIR)/cnn_update.o $(BUILDDIR)/cnn_free.o $(BUILDDIR)/cnn_jpeg.o $(BUILDDIR)/cnn_cuda_convolution.o $(BUILDDIR)/cnn_backpropagation.o $(BUILDDIR)/cuda_utils.o $(BUILDDIR)/colors.o $(BUILDDIR)/mnist.o $(NVCC) $(NVCCFLAGS) $^ -o $@ $(BUILDDIR)/cnn_%.o: $(CNN_SRCDIR)/%.c $(CNN_SRCDIR)/include/%.h diff --git a/src/cnn/cnn.c b/src/cnn/cnn.c index 42e06b7..5782757 100644 --- a/src/cnn/cnn.c +++ b/src/cnn/cnn.c @@ -38,6 +38,30 @@ void write_image_in_network_32(int** image, int height, int width, float** input } } +void write_image_in_network_260(unsigned char* image, int height, int width, float*** input) { + int input_size = 260; + int padding = (input_size - height)/2; + + for (int i=0; i < padding; i++) { + for (int j=0; j < input_size; j++) { + for (int composante=0; composante < 3; composante++) { + input[composante][i][j] = 0.; + input[composante][input_size-1-i][j] = 0.; + input[composante][j][i] = 0.; + input[composante][j][input_size-1-i] = 0.; + } + } + } + + for (int i=0; i < width; i++) { + for (int j=0; j < height; j++) { + for (int composante=0; composante < 3; composante++) { + input[composante][i+2][j+2] = (float)image[(i*height+j)*3 + composante] / 255.0f; + } + } + } +} + void forward_propagation(Network* network) { int activation, input_depth, input_width, output_depth, output_width; int n = network->size; diff --git a/src/cnn/include/cnn.h b/src/cnn/include/cnn.h index f5b4f14..0764257 100644 --- a/src/cnn/include/cnn.h +++ b/src/cnn/include/cnn.h @@ -14,6 +14,11 @@ int will_be_drop(int dropout_prob); */ void write_image_in_network_32(int** image, int height, int width, float** input); +/* +* Écrit une image linéarisée de 256*256*3 pixels dans un tableau de taille 260*260*3 +*/ +void write_image_in_network_260(unsigned char* image, int height, int width, float*** input); + /* * Propage en avant le cnn */ diff --git a/src/cnn/include/jpeg.h b/src/cnn/include/jpeg.h new file mode 100644 index 0000000..af8999f --- /dev/null +++ b/src/cnn/include/jpeg.h @@ -0,0 +1,69 @@ +#ifndef JPEG_DEF_H +#define JPEG_DEF_H + +// keep images in ram vs re-read and decompress each time +// #define STORE_IMAGES_TO_RAM +// Note: in use dataset is 90Go once decompressed, use with caution + +/* +* Struct used to describe a single JPEG image +*/ +typedef struct imgRawImage { + unsigned int numComponents; // Nombre de composantes (S, R, G, B, ...) + unsigned long int width, height; // Taille de l'image + unsigned char* lpData; // Données de l'image +} imgRawImage; + +/* +* Struct used to describe a full JPEG dataset +*/ +typedef struct jpegDataset { + unsigned int numComponents; // Nombre de composantes (S, R, G, B, ...) + unsigned int numImages; // Nombre d'images (fichiers) + unsigned int numCategories; // Nombre de catégories (dossiers) + + unsigned int width; // Largeur des images + unsigned int height; // Hauteur des images + + unsigned int* labels; // Labels + unsigned char** images; // Images en cache, vaut NULL si STORE_IMAGES_TO_RAM n'est pas défini + char** fileNames; // Noms de fichiers +} jpegDataset; + +/* +* Load a single JPEG image from its location +*/ +imgRawImage* loadJpegImageFile(char* lpFilename); + +/* +* Load a complete dataset from its path +*/ +jpegDataset* loadJpegDataset(char* folderPath); + +/* +* Count the number of directories available directly under a specific path +*/ +unsigned int countDirectories(char* path); + +/* +* Counts recursively the number of files available in a directory (and all subdirs) +*/ +unsigned int countFiles(char* path); + +/* +* Adds the names of files available under a directory to a char* array +*/ +void addFilenamesToArray(char* path, char** array, int* index); + +/* +* Free a dataset +*/ +void free_dataset(jpegDataset* dataset); + +/* +* Returns the value of the label for a given directory +* (Generated with Python) +*/ +unsigned int getLabel(char* string); + +#endif \ No newline at end of file diff --git a/src/cnn/include/train.h b/src/cnn/include/train.h index 95356b8..2afc718 100644 --- a/src/cnn/include/train.h +++ b/src/cnn/include/train.h @@ -1,4 +1,5 @@ #include "struct.h" +#include "jpeg.h" #ifndef DEF_TRAIN_H #define DEF_TRAIN_H @@ -12,16 +13,17 @@ * Structure donnée en argument à la fonction 'train_thread' */ typedef struct TrainParameters { - Network* network; - int*** images; - unsigned int* labels; - int width; - int height; - int dataset_type; - char* data_dir; - int start; - int nb_images; - float accuracy; + Network* network; // Réseau + jpegDataset* dataset; // Dataset si de type JPEG + int* index; // Sert à réordonner les images + int*** images; // Images si de type MNIST + unsigned int* labels; // Labels si de type MNIST + int width; // Largeur des images + int height; // Hauteur des images + int dataset_type; // Type de dataset + int start; // Début des images + int nb_images; // Nombre d'images àn traiter + float accuracy; // Accuracy (à renvoyer) } TrainParameters; diff --git a/src/cnn/jpeg.c b/src/cnn/jpeg.c new file mode 100644 index 0000000..fb522a2 --- /dev/null +++ b/src/cnn/jpeg.c @@ -0,0 +1,317 @@ +#include +#include +#include +#include +#include +#include + +#include "include/jpeg.h" +#include "../include/colors.h" + +// How to load a JPEG using libjpeg: https://www.tspi.at/2020/03/20/libjpegexample.html +imgRawImage* loadJpegImageFile(char* lpFilename) { + struct jpeg_decompress_struct info; + struct jpeg_error_mgr err; + + imgRawImage* lpNewImage; + + unsigned long int imgWidth, imgHeight; + int numComponents; + + unsigned long int dwBufferBytes; + unsigned char* lpData; + + unsigned char* lpRowBuffer[1]; + + FILE* fHandle; + + fHandle = fopen(lpFilename, "rb"); + if(fHandle == NULL) { + fprintf(stderr, "%s:%u: Failed to read file %s\n", __FILE__, __LINE__, lpFilename); + return NULL; /* ToDo */ + } + + info.err = jpeg_std_error(&err); + jpeg_create_decompress(&info); + + jpeg_stdio_src(&info, fHandle); + jpeg_read_header(&info, TRUE); + + jpeg_start_decompress(&info); + imgWidth = info.output_width; + imgHeight = info.output_height; + numComponents = info.num_components; + + #ifdef DEBUG + fprintf( + stderr, + "%s:%u: Reading JPEG with dimensions %lu x %lu and %u components\n", + __FILE__, __LINE__, + imgWidth, imgHeight, numComponents + ); + #endif + + dwBufferBytes = imgWidth * imgHeight * 3; /* We only read RGB, not A */ + lpData = (unsigned char*)malloc(sizeof(unsigned char)*dwBufferBytes); + + lpNewImage = (imgRawImage*)malloc(sizeof(imgRawImage)); + lpNewImage->numComponents = numComponents; + lpNewImage->width = imgWidth; + lpNewImage->height = imgHeight; + lpNewImage->lpData = lpData; + + /* Read scanline by scanline */ + while(info.output_scanline < info.output_height) { + lpRowBuffer[0] = (unsigned char *)(&lpData[3*info.output_width*info.output_scanline]); + jpeg_read_scanlines(&info, lpRowBuffer, 1); + } + + jpeg_finish_decompress(&info); + jpeg_destroy_decompress(&info); + fclose(fHandle); + + return lpNewImage; +} + +jpegDataset* loadJpegDataset(char* folderPath) { + jpegDataset* dataset = (jpegDataset*)malloc(sizeof(jpegDataset)); + imgRawImage* image; + + // We start by counting the number of images and categories + dataset->numCategories = countDirectories(folderPath); + dataset->numImages = countFiles(folderPath); + + dataset->images = NULL; + dataset->labels = (unsigned int*)malloc(sizeof(unsigned int)*dataset->numImages); + dataset->fileNames = (char**)malloc(sizeof(char*)*dataset->numImages); + + DIR* dirp; + struct dirent* entry; + char* concatenated_path; + int index = 0; + int prev_index = index; + + dirp = opendir(folderPath); + while ((entry = readdir(dirp)) != NULL) { + if (strcmp(entry->d_name, ".")&&strcmp(entry->d_name, "..")) { + if (entry->d_type == DT_DIR) { + prev_index = index; + concatenated_path = malloc(strlen(folderPath)+strlen(entry->d_name)+2); + sprintf(concatenated_path, "%s/%s", folderPath, entry->d_name); + addFilenamesToArray(concatenated_path, dataset->fileNames, &index); + for (int i=prev_index; i < index; i++) { + dataset->labels[i] = getLabel(entry->d_name); + } + free(concatenated_path); + } + } + } + dataset->images = (unsigned char**)malloc(sizeof(unsigned char*)*dataset->numImages); + for (int i=0; i < (int)dataset->numImages; i++) { + dataset->images[i] = NULL; + #ifdef STORE_IMAGES_TO_RAM + if (i%1000 == 0) { + printf("[%d/%d] Chargement des images\r\n", i, dataset->numImages); + fflush(stdout); + } + image = loadJpegImageFile(dataset->fileNames[i]); + dataset->images[i] = image->lpData; + free(image); + #endif + } + #ifdef STORE_IMAGES_TO_RAM + printf("Chargement des images terminé \n"); + #endif + + // Lecture des caractéristiques des images + image = loadJpegImageFile(dataset->fileNames[0]); + dataset->width = image->width; + dataset->height = image->height; + dataset->numComponents = image->numComponents; + + free(image->lpData); + free(image); + + closedir(dirp); + return dataset; +} + +unsigned int countDirectories(char* path) { + unsigned int directories = 0; + DIR* dirp; + struct dirent* entry; + + dirp = opendir(path); + while ((entry = readdir(dirp)) != NULL) { + if (entry->d_type == DT_DIR && strcmp(entry->d_name, ".") && strcmp(entry->d_name, "..")) { + directories++; + } + } + closedir(dirp); + return directories; +} + +unsigned int countFiles(char* path) { + unsigned int files = 0; + DIR* dirp; + char* next_dir; + struct dirent* entry; + + dirp = opendir(path); + while ((entry = readdir(dirp)) != NULL) { + if (strcmp(entry->d_name, ".")&&strcmp(entry->d_name, "..")) { + if (entry->d_type == DT_REG) { + files++; + } else if (entry->d_type == DT_DIR) { + next_dir = (char*)malloc(strlen(path)+strlen(entry->d_name)+2); + sprintf(next_dir, "%s/%s", path, entry->d_name); + files += countFiles(next_dir); + free(next_dir); + } + } + } + closedir(dirp); + return files; +} + +void addFilenamesToArray(char* path, char** array, int* index) { + int i = *index; + + DIR* dirp; + struct dirent* entry; + char* filename; + + dirp = opendir(path); /* There should be error handling after this */ + while ((entry = readdir(dirp)) != NULL) { + if (entry->d_type == DT_REG) { /* If the entry is a regular file */ + filename = (char*)malloc(strlen(path)+strlen(entry->d_name)+2); + sprintf(filename, "%s/%s", path, entry->d_name); + array[i] = filename; + i++; + } + } + *index = i; + closedir(dirp); +} + +void free_dataset(jpegDataset* dataset) { + for (int i=0; i < (int)dataset->numImages; i++) { + free(dataset->fileNames[i]); + #ifdef STORE_IMAGES_TO_RAM + free(dataset->images[i]); + #endif + } + free(dataset->fileNames); + free(dataset->labels); + #ifdef STORE_IMAGES_TO_RAM + free(dataset->images); + #endif + free(dataset); +} + +unsigned int getLabel(char* string) { + if (!strcmp(string, "Alabama")) { + return 0; + } if (!strcmp(string, "Alaska")) { + return 1; + } if (!strcmp(string, "Arizona")) { + return 2; + } if (!strcmp(string, "Arkansas")) { + return 3; + } if (!strcmp(string, "California")) { + return 4; + } if (!strcmp(string, "Colorado")) { + return 5; + } if (!strcmp(string, "Connecticut")) { + return 6; + } if (!strcmp(string, "Delaware")) { + return 7; + } if (!strcmp(string, "Florida")) { + return 8; + } if (!strcmp(string, "Georgia")) { + return 9; + } if (!strcmp(string, "Hawaii")) { + return 10; + } if (!strcmp(string, "Idaho")) { + return 11; + } if (!strcmp(string, "Illinois")) { + return 12; + } if (!strcmp(string, "Indiana")) { + return 13; + } if (!strcmp(string, "Iowa")) { + return 14; + } if (!strcmp(string, "Kansas")) { + return 15; + } if (!strcmp(string, "Kentucky")) { + return 16; + } if (!strcmp(string, "Louisiana")) { + return 17; + } if (!strcmp(string, "Maine")) { + return 18; + } if (!strcmp(string, "Maryland")) { + return 19; + } if (!strcmp(string, "Massachusetts")) { + return 20; + } if (!strcmp(string, "Michigan")) { + return 21; + } if (!strcmp(string, "Minnesota")) { + return 22; + } if (!strcmp(string, "Mississippi")) { + return 23; + } if (!strcmp(string, "Missouri")) { + return 24; + } if (!strcmp(string, "Montana")) { + return 25; + } if (!strcmp(string, "Nebraska")) { + return 26; + } if (!strcmp(string, "Nevada")) { + return 27; + } if (!strcmp(string, "New Hampshire")) { + return 28; + } if (!strcmp(string, "New Jersey")) { + return 29; + } if (!strcmp(string, "New Mexico")) { + return 30; + } if (!strcmp(string, "New York")) { + return 31; + } if (!strcmp(string, "North Carolina")) { + return 32; + } if (!strcmp(string, "North Dakota")) { + return 33; + } if (!strcmp(string, "Ohio")) { + return 34; + } if (!strcmp(string, "Oklahoma")) { + return 35; + } if (!strcmp(string, "Oregon")) { + return 36; + } if (!strcmp(string, "Pennsylvania")) { + return 37; + } if (!strcmp(string, "Rhode Island")) { + return 38; + } if (!strcmp(string, "South Carolina")) { + return 39; + } if (!strcmp(string, "South Dakota")) { + return 40; + } if (!strcmp(string, "Tennessee")) { + return 41; + } if (!strcmp(string, "Texas")) { + return 42; + } if (!strcmp(string, "Utah")) { + return 43; + } if (!strcmp(string, "Vermont")) { + return 44; + } if (!strcmp(string, "Virginia")) { + return 45; + } if (!strcmp(string, "Washington")) { + return 46; + } if (!strcmp(string, "West Virginia")) { + return 47; + } if (!strcmp(string, "Wisconsin")) { + return 48; + } if (!strcmp(string, "Wyoming")) { + return 49; + } + printf_warning("Catégorie non reconnue "); + printf("%s\n", string); + return -1; // Dossier non reconnu +} \ No newline at end of file diff --git a/src/cnn/train.c b/src/cnn/train.c index 7731875..988ed3c 100644 --- a/src/cnn/train.c +++ b/src/cnn/train.c @@ -14,6 +14,7 @@ #include "include/update.h" #include "include/utils.h" #include "include/free.h" +#include "include/jpeg.h" #include "include/cnn.h" #include "include/train.h" @@ -36,6 +37,7 @@ int indice_max(float* tab, int n) { void* train_thread(void* parameters) { TrainParameters* param = (TrainParameters*)parameters; Network* network = param->network; + imgRawImage* image; int maxi; int*** images = param->images; @@ -58,8 +60,22 @@ void* train_thread(void* parameters) { accuracy += 1.; } } else { - printf_error("Dataset de type JPG non implémenté\n"); - exit(1); + if (!param->dataset->images[i]) { + image = loadJpegImageFile(param->dataset->fileNames[i]); + param->dataset->images[i] = image->lpData; + free(image); + } + write_image_in_network_260(param->dataset->images[i], height, width, network->input[0]); + forward_propagation(network); + maxi = indice_max(network->input[network->size-1][0][0], 10); + backward_propagation(network, param->dataset->labels[i]); + + if (maxi == (int)param->dataset->labels[i]) { + accuracy += 1.; + } + + free(param->dataset->images[i]); + param->dataset->images[i] = NULL; } } @@ -79,6 +95,7 @@ void train(int dataset_type, char* images_file, char* labels_file, char* data_di int*** images; unsigned int* labels; + jpegDataset* dataset; if (dataset_type == 0) { // Type MNIST // Chargement des images du set de données MNIST @@ -91,13 +108,12 @@ void train(int dataset_type, char* images_file, char* labels_file, char* data_di input_dim = 32; input_depth = 1; - } else { // TODO Type JPG - input_dim = 256; - input_depth = 3; + } else { // Type JPG + dataset = loadJpegDataset(data_dir); + input_dim = dataset->height + 4; // image_size + padding + input_depth = dataset->numComponents; - nb_images_total = 0; - printf_error("Dataset de type jpg non-implémenté.\n"); - exit(1); + nb_images_total = dataset->numImages; } // Initialisation du réseau @@ -120,11 +136,13 @@ void train(int dataset_type, char* images_file, char* labels_file, char* data_di if (dataset_type == 0) { param->images = images; param->labels = labels; - param->data_dir = NULL; + param->dataset = NULL; param->width = 28; param->height = 28; } else { - param->data_dir = data_dir; + param->dataset = dataset; + param->width = dataset->width; + param->height = dataset->height; param->images = NULL; param->labels = NULL; } @@ -133,7 +151,6 @@ void train(int dataset_type, char* images_file, char* labels_file, char* data_di #else // Création des paramètres donnés à l'unique // thread dans l'hypothèse ou le multi-threading n'est pas utilisé. - // Cela est utile à des fins de débogage notamment, // où l'utilisation de threads rend vite les choses plus compliquées qu'elles ne le sont. TrainParameters* train_params = (TrainParameters*)malloc(sizeof(TrainParameters)); @@ -145,12 +162,12 @@ void train(int dataset_type, char* images_file, char* labels_file, char* data_di train_params->labels = labels; train_params->width = 28; train_params->height = 28; - train_params->data_dir = NULL; + train_params->dataset = NULL; } else { - train_params->data_dir = data_dir; + train_params->dataset = dataset; + train_params->width = dataset->width; + train_params->height = dataset->height; train_params->images = NULL; - train_params->width = 0; - train_params->height = 0; train_params->labels = NULL; } train_params->nb_images = BATCHES; @@ -185,7 +202,7 @@ void train(int dataset_type, char* images_file, char* labels_file, char* data_di // TODO patch_network(network, train_parameters[k]->network, train_parameters[k]->nb_images); free_network(train_parameters[k]->network); } - current_accuracy = accuracy * nb_images_total/(j*BATCHES); + current_accuracy = accuracy * nb_images_total/((j+1)*BATCHES); printf("\rThreads [%d]\tÉpoque [%d/%d]\tImage [%d/%d]\tAccuracy: "YELLOW"%0.1f%%"RESET" ", nb_threads, i, epochs, BATCHES*(j+1), nb_images_total, current_accuracy*100); #else train_params->start = j*BATCHES; @@ -193,7 +210,7 @@ void train(int dataset_type, char* images_file, char* labels_file, char* data_di train_thread((void*)train_params); accuracy += train_params->accuracy / (float) nb_images_total; - current_accuracy = accuracy * nb_images_total/(j*BATCHES); + current_accuracy = accuracy * nb_images_total/((j+1)*BATCHES); update_weights(network, network); update_bias(network, network); diff --git a/test/cnn_jpeg.c b/test/cnn_jpeg.c new file mode 100644 index 0000000..7acd776 --- /dev/null +++ b/test/cnn_jpeg.c @@ -0,0 +1,44 @@ +#include +#include + +#include "../src/cnn/include/jpeg.h" +#include "../src/include/colors.h" + +int main(int argc, char* argv[]) { + if (argc != 2) { + printf("Pas de dataset en argument, test avorté\n"); + // On n'arrête pas le processus avce un code de sortie + // pour pouvoir utiliser `make run-tests` dans des scripts + // sans avoir à spécifier d'arguments supplémentaires + return 0; + } + jpegDataset* dataset = loadJpegDataset(argv[1]); + printf("Nombre de catégories: %d\n", dataset->numCategories); + printf("Nombre d'images: %d\n", dataset->numImages); + printf("Taille des images: %dx%d\n", dataset->width, dataset->height); + #ifdef STORE_IMAGES_TO_RAM + if (!dataset->images) { + printf_error("Aucune image n'a été chargée\n"); + return 1; + } + #endif + for (int i=0; i < (int)dataset->numImages; i++) { + if (!dataset->fileNames[i]) { + printf_error("Nom de fichier non chargé à l'index "); + printf("%d\n", i); + return 1; + } + #ifdef STORE_IMAGES_TO_RAM + if (!dataset->images[i]) { + printf_error("Image non chargée à l'index "); + printf("%d\n", i); + printf_error("Nom du fichier: "); + printf("%s\n", dataset->fileNames[i]); + return 1; + } + #endif + } + + free_dataset(dataset); + return 0; +} \ No newline at end of file From 715f550a495d8df2509b220fdeae38918efb951c Mon Sep 17 00:00:00 2001 From: augustin64 Date: Sat, 19 Nov 2022 16:11:24 +0100 Subject: [PATCH 2/5] Update Makefile --- Makefile | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/Makefile b/Makefile index 6624eb4..288a7d8 100644 --- a/Makefile +++ b/Makefile @@ -29,8 +29,8 @@ CFLAGS = -std=gnu99 -lm -lpthread -ljpeg NVCCFLAGS = -ljpeg # Additional warning rules -CFLAGS += -Wall -Wextra -NVCCFLAGS += +CFLAGS += -Wall -Wextra +NVCCFLAGS += # Remove warnings about unused variables, functions, ... # -Wno-unused-parameter -Wno-unused-function -Wno-unused-variable -Wno-unused-but-set-variable # Compile with debug @@ -69,7 +69,11 @@ $(BUILDDIR)/cnn-main: $(CNN_SRCDIR)/main.c $(BUILDDIR)/cnn_train.o $(BUILDDIR)/c $(CC) $^ -o $@ $(CFLAGS) $(BUILDDIR)/cnn-main-cuda: $(BUILDDIR)/cnn_main.o $(BUILDDIR)/cnn_train.o $(BUILDDIR)/cnn_cnn.o $(BUILDDIR)/cnn_creation.o $(BUILDDIR)/cnn_initialisation.o $(BUILDDIR)/cnn_make.o $(BUILDDIR)/cnn_neuron_io.o $(BUILDDIR)/cnn_function.o $(BUILDDIR)/cnn_utils.o $(BUILDDIR)/cnn_update.o $(BUILDDIR)/cnn_free.o $(BUILDDIR)/cnn_jpeg.o $(BUILDDIR)/cnn_cuda_convolution.o $(BUILDDIR)/cnn_backpropagation.o $(BUILDDIR)/cuda_utils.o $(BUILDDIR)/colors.o $(BUILDDIR)/mnist.o +ifndef NVCC_INSTALLED + @echo "$(NVCC) not found, skipping" +else $(NVCC) $(NVCCFLAGS) $^ -o $@ +endif $(BUILDDIR)/cnn_%.o: $(CNN_SRCDIR)/%.c $(CNN_SRCDIR)/include/%.h $(CC) -c $< -o $@ $(CFLAGS) From cfb3cb28d737a18efd0a9d996dd025b895c00086 Mon Sep 17 00:00:00 2001 From: augustin64 Date: Sat, 19 Nov 2022 17:39:56 +0100 Subject: [PATCH 3/5] Add cnn/preview.c --- Makefile | 5 ++++- src/cnn/preview.c | 51 +++++++++++++++++++++++++++++++++++++++++++++++ 2 files changed, 55 insertions(+), 1 deletion(-) create mode 100644 src/cnn/preview.c diff --git a/Makefile b/Makefile index 288a7d8..a22b6c5 100644 --- a/Makefile +++ b/Makefile @@ -63,7 +63,7 @@ $(BUILDDIR)/mnist_%.o: $(MNIST_SRCDIR)/%.c $(MNIST_SRCDIR)/include/%.h # # Build cnn # -cnn: $(BUILDDIR)/cnn-main $(BUILDDIR)/cnn-main-cuda; +cnn: $(BUILDDIR)/cnn-main $(BUILDDIR)/cnn-main-cuda $(BUILDDIR)/cnn-preview; $(BUILDDIR)/cnn-main: $(CNN_SRCDIR)/main.c $(BUILDDIR)/cnn_train.o $(BUILDDIR)/cnn_cnn.o $(BUILDDIR)/cnn_creation.o $(BUILDDIR)/cnn_initialisation.o $(BUILDDIR)/cnn_make.o $(BUILDDIR)/cnn_neuron_io.o $(BUILDDIR)/cnn_function.o $(BUILDDIR)/cnn_utils.o $(BUILDDIR)/cnn_update.o $(BUILDDIR)/cnn_free.o $(BUILDDIR)/cnn_jpeg.o $(BUILDDIR)/cnn_convolution.o $(BUILDDIR)/cnn_backpropagation.o $(BUILDDIR)/colors.o $(BUILDDIR)/mnist.o $(CC) $^ -o $@ $(CFLAGS) @@ -75,6 +75,9 @@ else $(NVCC) $(NVCCFLAGS) $^ -o $@ endif +$(BUILDDIR)/cnn-preview: $(CNN_SRCDIR)/preview.c $(BUILDDIR)/cnn_jpeg.o $(BUILDDIR)/colors.o + $(CC) $^ -o $@ $(CFLAGS) + $(BUILDDIR)/cnn_%.o: $(CNN_SRCDIR)/%.c $(CNN_SRCDIR)/include/%.h $(CC) -c $< -o $@ $(CFLAGS) diff --git a/src/cnn/preview.c b/src/cnn/preview.c new file mode 100644 index 0000000..5e0da6a --- /dev/null +++ b/src/cnn/preview.c @@ -0,0 +1,51 @@ +#include +#include +#include +#include + +#include "include/jpeg.h" + + +void print_image(unsigned char* image, int height, int width) { + + for (int i=0; i < (int)width; i++) { + for (int j=0; j < (int)height; j++) { + printf("\x1b[38;2;%d;%d;%dm#\x1b[0m", image[((i*width)+j)*3 + 0], image[((i*width)+j)*3 + 1], image[((i*width)+j)*3 + 2]); + } + printf("\n"); + } +} + +void preview_images(char* path, int limit) { + jpegDataset* dataset = loadJpegDataset(path); + imgRawImage* image; + + if (limit == -1) { + limit = dataset->numImages; + } + for (int i=0; i < limit; i++) { + printf("--- Image %d : %d ---\n", i, dataset->labels[i]); + + if (!dataset->images[i]) { + image = loadJpegImageFile(dataset->fileNames[i]); + dataset->images[i] = image->lpData; + free(image); + } + print_image(dataset->images[i], dataset->height, dataset->width); + + free(dataset->images[i]); + } +} + +int main(int argc, char *argv[]) { + if (argc < 2) { + printf("Utilisation: %s [DIRECTORY] (opt:nombre d'images)\n", argv[0]); + return 1; + } + int limit = -1; + if (argc > 2) { + limit = strtol(argv[2], NULL, 10); + } + preview_images(argv[1], limit); + return 0; +} From 66e302d6c1344524cb19d015139d93762180dbc8 Mon Sep 17 00:00:00 2001 From: augustin64 Date: Sat, 19 Nov 2022 17:46:17 +0100 Subject: [PATCH 4/5] Update Makefile --- Makefile | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Makefile b/Makefile index a22b6c5..2e01f9f 100644 --- a/Makefile +++ b/Makefile @@ -13,7 +13,7 @@ CNN_SRC := $(wildcard $(CNN_SRCDIR)/*.c) CNN_SRC_CUDA := $(wildcard $(CNN_SRCDIR)/*.cu) MNIST_OBJ = $(filter-out $(BUILDDIR)/mnist_main.o $(BUILDDIR)/mnist_utils.o $(BUILDDIR)/mnist_preview.o, $(MNIST_SRC:$(MNIST_SRCDIR)/%.c=$(BUILDDIR)/mnist_%.o)) -CNN_OBJ = $(filter-out $(BUILDDIR)/cnn_main.o, $(CNN_SRC:$(CNN_SRCDIR)/%.c=$(BUILDDIR)/cnn_%.o)) +CNN_OBJ = $(filter-out $(BUILDDIR)/cnn_main.o $(BUILDDIR)/cnn_preview.o, $(CNN_SRC:$(CNN_SRCDIR)/%.c=$(BUILDDIR)/cnn_%.o)) CNN_OBJ_CUDA = $(CNN_SRC:$(CNN_SRCDIR)/%.cu=$(BUILDDIR)/cnn_%.o) From 18900f3fe9a575b831ace6648dc3e02440b1cee8 Mon Sep 17 00:00:00 2001 From: augustin64 Date: Sat, 19 Nov 2022 22:22:24 +0100 Subject: [PATCH 5/5] Change images aspect ration --- src/cnn/preview.c | 15 +++++++++++---- src/cnn/train.c | 2 +- 2 files changed, 12 insertions(+), 5 deletions(-) diff --git a/src/cnn/preview.c b/src/cnn/preview.c index 5e0da6a..9875dcd 100644 --- a/src/cnn/preview.c +++ b/src/cnn/preview.c @@ -7,12 +7,19 @@ void print_image(unsigned char* image, int height, int width) { + int red, green, blue; + for (int i=0; i < (int)height/2; i++) { + for (int j=0; j < (int)width; j++) { + red = (image[((2*i*width)+j)*3 + 0] + image[(((2*i+1)*width)+j)*3 + 0])/2; + green = (image[((2*i*width)+j)*3 + 1] + image[(((2*i+1)*width)+j)*3 + 1])/2;; + blue = (image[((2*i*width)+j)*3 + 2] + image[(((2*i+1)*width)+j)*3 + 2])/2;; - for (int i=0; i < (int)width; i++) { - for (int j=0; j < (int)height; j++) { - printf("\x1b[38;2;%d;%d;%dm#\x1b[0m", image[((i*width)+j)*3 + 0], image[((i*width)+j)*3 + 1], image[((i*width)+j)*3 + 2]); + // Make the text color opposed to background color + printf("\x1b[38;2;%d;%d;%dm", 255-red, 255-green, 255-blue); + + printf("\x1b[48;2;%d;%d;%dm ", red, green, blue); } - printf("\n"); + printf("\x1b[0m\n"); } } diff --git a/src/cnn/train.c b/src/cnn/train.c index 988ed3c..bc4dfec 100644 --- a/src/cnn/train.c +++ b/src/cnn/train.c @@ -67,7 +67,7 @@ void* train_thread(void* parameters) { } write_image_in_network_260(param->dataset->images[i], height, width, network->input[0]); forward_propagation(network); - maxi = indice_max(network->input[network->size-1][0][0], 10); + maxi = indice_max(network->input[network->size-1][0][0], param->dataset->numCategories); backward_propagation(network, param->dataset->labels[i]); if (maxi == (int)param->dataset->labels[i]) {