tipe/src/mnist/main.c

367 lines
12 KiB
C
Raw Normal View History

2022-04-07 22:07:32 +02:00
#include <stdlib.h>
#include <stdio.h>
#include <string.h>
2022-04-11 19:57:09 +02:00
#include <float.h>
2022-04-07 22:07:32 +02:00
2022-04-08 15:53:29 +02:00
#include "neural_network.c"
#include "neuron_io.c"
#include "mnist.c"
2022-04-30 09:31:52 +02:00
#define EPOCHS 100
#define BATCHES 100
2022-04-26 17:46:41 +02:00
2022-04-26 16:47:43 +02:00
void print_image(unsigned int width, unsigned int height, int** image, float* previsions) {
char tab[] = {' ', '.', ':', '%', '#', '\0'};
for (int i=0; i < height; i++) {
for (int j=0; j < width; j++) {
printf("%c", tab[image[i][j]/52]);
}
if (i < 10) {
printf("\t%d : %f", i, previsions[i]);
}
printf("\n");
}
}
2022-04-08 15:53:29 +02:00
2022-04-10 21:28:54 +02:00
int indice_max(float* tab, int n) {
int indice = -1;
2022-04-11 19:57:09 +02:00
float maxi = FLT_MIN;
2022-04-10 21:28:54 +02:00
for (int i=0; i < n; i++) {
if (tab[i] > maxi) {
maxi = tab[i];
indice = i;
}
}
return indice;
}
2022-04-07 22:07:32 +02:00
void help(char* call) {
2022-05-03 10:02:47 +02:00
printf("Usage: %s ( train | recognize | test ) [OPTIONS]\n\n", call);
2022-04-07 22:07:32 +02:00
printf("OPTIONS:\n");
printf("\ttrain:\n");
2022-04-08 15:53:29 +02:00
printf("\t\t--batches | -b [int]\tNombre de batches.\n");
2022-04-30 09:31:52 +02:00
printf("\t\t--couches | -c [int]\tNombres de couches.\n");
printf("\t\t--neurones | -n [int]\tNombre de neurones sur la première couche.\n");
2022-04-08 19:34:26 +02:00
printf("\t\t--recover | -r [FILENAME]\tRécupérer depuis un modèle existant.\n");
2022-04-08 15:53:29 +02:00
printf("\t\t--images | -i [FILENAME]\tFichier contenant les images.\n");
printf("\t\t--labels | -l [FILENAME]\tFichier contenant les labels.\n");
2022-04-30 09:31:52 +02:00
printf("\t\t--out | -o [FILENAME]\tFichier où écrire le réseau de neurones.\n");
2022-04-07 22:07:32 +02:00
printf("\trecognize:\n");
2022-04-30 09:31:52 +02:00
printf("\t\t--modele | -m [FILENAME]\tFichier contenant le réseau de neurones.\n");
2022-04-14 13:02:09 +02:00
printf("\t\t--in | -i [FILENAME]\tFichier contenant les images à reconnaître.\n");
printf("\t\t--out | -o (text|json)\tFormat de sortie.\n");
printf("\ttest:\n");
printf("\t\t--images | -i [FILENAME]\tFichier contenant les images.\n");
printf("\t\t--labels | -l [FILENAME]\tFichier contenant les labels.\n");
2022-04-30 09:31:52 +02:00
printf("\t\t--modele | -m [FILENAME]\tFichier contenant le réseau de neurones.\n");
2022-04-26 16:47:43 +02:00
printf("\t\t--preview-fails | -p\tAfficher les images ayant échoué.\n");
2022-04-08 15:53:29 +02:00
}
2022-04-25 14:39:45 +02:00
void write_image_in_network(int** image, Network* network, int height, int width) {
2022-04-08 15:53:29 +02:00
for (int i=0; i < height; i++) {
for (int j=0; j < width; j++) {
2022-04-25 14:39:45 +02:00
network->layers[0]->neurons[i*height+j]->z = (float)image[i][j] / 255.0f;
2022-04-08 15:53:29 +02:00
}
}
}
2022-04-25 14:39:45 +02:00
void train(int batches, int layers, int neurons, char* recovery, char* image_file, char* label_file, char* out) {
2022-04-08 16:07:44 +02:00
// Entraînement du réseau sur le set de données MNIST
2022-04-25 14:39:45 +02:00
Network* network;
2022-04-08 15:53:29 +02:00
2022-04-25 14:39:45 +02:00
//int* repartition = malloc(sizeof(int)*layers);
2022-05-10 21:04:48 +02:00
int nb_neurons_last = 10;
int repartition[2] = {784, nb_neurons_last};
2022-04-10 21:28:54 +02:00
2022-05-10 21:04:48 +02:00
float* output = malloc(sizeof(float)*nb_neurons_last);
2022-04-25 14:39:45 +02:00
int* desired_output;
2022-04-10 21:28:54 +02:00
float accuracy;
2022-04-26 16:47:43 +02:00
float loss;
2022-04-25 14:39:45 +02:00
//generer_repartition(layers, repartition);
2022-04-08 15:53:29 +02:00
/*
* On repart d'un réseau déjà créée stocké dans un fichier
* ou on repart de zéro si aucune backup n'est fournie
* */
if (! recovery) {
2022-04-25 14:39:45 +02:00
network = malloc(sizeof(Network));
network_creation(network, repartition, layers);
network_initialisation(network);
2022-04-08 15:53:29 +02:00
} else {
2022-04-25 14:39:45 +02:00
network = read_network(recovery);
2022-04-08 19:34:26 +02:00
printf("Backup restaurée.\n");
2022-04-08 15:53:29 +02:00
}
2022-04-25 14:39:45 +02:00
Layer* der_layer = network->layers[network->nb_layers-1];
2022-04-10 21:28:54 +02:00
2022-04-08 15:53:29 +02:00
// Chargement des images du set de données MNIST
int* parameters = read_mnist_images_parameters(image_file);
int nb_images = parameters[0];
int height = parameters[1];
int width = parameters[2];
int*** images = read_mnist_images(image_file);
unsigned int* labels = read_mnist_labels(label_file);
for (int i=0; i < batches; i++) {
2022-04-10 12:01:20 +02:00
printf("Batch [%d/%d]", i, batches);
2022-04-10 21:28:54 +02:00
accuracy = 0.;
2022-04-26 16:47:43 +02:00
loss = 0.;
2022-04-10 21:28:54 +02:00
2022-04-08 15:53:29 +02:00
for (int j=0; j < nb_images; j++) {
2022-04-10 12:01:20 +02:00
printf("\rBatch [%d/%d]\tImage [%d/%d]",i, batches, j, nb_images);
2022-04-10 21:28:54 +02:00
2022-04-25 14:39:45 +02:00
write_image_in_network(images[j], network, height, width);
desired_output = desired_output_creation(network, labels[j]);
forward_propagation(network);
backward_propagation(network, desired_output);
2022-04-10 21:28:54 +02:00
2022-05-10 21:04:48 +02:00
for (int k=0; k < nb_neurons_last; k++) {
output[k] = der_layer->neurons[k]->z;
2022-04-10 21:28:54 +02:00
}
2022-05-10 21:04:48 +02:00
if (indice_max(output, nb_neurons_last) == labels[j]) {
2022-04-10 21:28:54 +02:00
accuracy += 1. / (float)nb_images;
}
2022-04-26 16:47:43 +02:00
loss += loss_computing(network, labels[j]) / (float)nb_images;
2022-04-25 14:39:45 +02:00
free(desired_output);
2022-04-26 17:46:41 +02:00
2022-05-03 09:56:05 +02:00
if (j%BATCHES==BATCHES-1)
2022-04-26 17:46:41 +02:00
network_modification(network, BATCHES);
2022-04-08 15:53:29 +02:00
}
2022-04-26 17:46:41 +02:00
2022-05-03 09:56:05 +02:00
if (nb_images%BATCHES != 0)
2022-04-26 17:46:41 +02:00
network_modification(network, nb_images%BATCHES);
2022-04-26 16:47:43 +02:00
printf("\rBatch [%d/%d]\tImage [%d/%d]\tAccuracy: %0.1f%%\tLoss: %f\n",i, batches, nb_images, nb_images, accuracy*100, loss);
2022-04-25 14:39:45 +02:00
write_network(out, network);
2022-04-08 15:53:29 +02:00
}
2022-04-25 14:39:45 +02:00
deletion_of_network(network);
2022-04-07 22:07:32 +02:00
}
2022-05-10 21:04:48 +02:00
float** recognize(char* model, char* entree) {
Network* network = read_network(model);
Layer* last_layer = network->layers[network->nb_layers-1];
2022-04-08 16:07:44 +02:00
int* parameters = read_mnist_images_parameters(entree);
int nb_images = parameters[0];
int height = parameters[1];
int width = parameters[2];
int*** images = read_mnist_images(entree);
2022-04-14 13:02:09 +02:00
float** results = malloc(sizeof(float*)*nb_images);
for (int i=0; i < nb_images; i++) {
2022-05-10 21:04:48 +02:00
results[i] = malloc(sizeof(float)*last_layer->nb_neurons);
2022-04-14 13:02:09 +02:00
2022-04-25 14:39:45 +02:00
write_image_in_network(images[i], network, height, width);
forward_propagation(network);
2022-04-14 13:02:09 +02:00
2022-05-10 21:04:48 +02:00
for (int j=0; j < last_layer->nb_neurons; j++) {
results[i][j] = last_layer->neurons[j]->z;
2022-04-14 13:02:09 +02:00
}
}
2022-04-25 14:39:45 +02:00
deletion_of_network(network);
2022-04-14 13:02:09 +02:00
return results;
}
2022-05-10 21:04:48 +02:00
void print_recognize(char* model, char* entree, char* output) {
Network* network = read_network(model);
2022-04-25 14:39:45 +02:00
int nb_der_layer = network->layers[network->nb_layers-1]->nb_neurons;
2022-04-14 13:02:09 +02:00
2022-04-25 14:39:45 +02:00
deletion_of_network(network);
2022-04-14 13:02:09 +02:00
int* parameters = read_mnist_images_parameters(entree);
int nb_images = parameters[0];
2022-05-10 21:04:48 +02:00
float** results = recognize(model, entree);
2022-04-08 16:07:44 +02:00
2022-05-10 21:04:48 +02:00
if (! strcmp(output, "json")) {
2022-04-10 12:01:20 +02:00
printf("{\n");
}
2022-04-08 16:07:44 +02:00
for (int i=0; i < nb_images; i++) {
2022-05-10 21:04:48 +02:00
if (! strcmp(output, "text"))
2022-04-10 12:01:20 +02:00
printf("Image %d\n", i);
else
printf("\"%d\" : [", i);
2022-04-25 14:39:45 +02:00
for (int j=0; j < nb_der_layer; j++) {
2022-05-10 21:04:48 +02:00
if (! strcmp(output, "json")) {
printf("%f", results[i][j]);
2022-04-10 21:28:54 +02:00
2022-04-25 14:39:45 +02:00
if (j+1 < nb_der_layer) {
2022-04-10 12:01:20 +02:00
printf(", ");
}
} else
2022-05-10 21:04:48 +02:00
printf("Probabilité %d: %f\n", j, results[i][j]);
2022-04-10 12:01:20 +02:00
}
2022-05-10 21:04:48 +02:00
if (! strcmp(output, "json")) {
2022-04-10 12:01:20 +02:00
if (i+1 < nb_images) {
printf("],\n");
} else {
printf("]\n");
}
2022-04-08 16:07:44 +02:00
}
}
2022-05-10 21:04:48 +02:00
if (! strcmp(output, "json"))
2022-04-11 18:00:32 +02:00
printf("}\n");
2022-04-08 16:07:44 +02:00
}
2022-05-10 21:04:48 +02:00
void test(char* model, char* fichier_images, char* fichier_labels, bool preview_fails) {
Network* network = read_network(model);
2022-04-25 14:39:45 +02:00
int nb_der_layer = network->layers[network->nb_layers-1]->nb_neurons;
2022-04-14 13:02:09 +02:00
2022-04-25 14:39:45 +02:00
deletion_of_network(network);
2022-04-14 13:02:09 +02:00
int* parameters = read_mnist_images_parameters(fichier_images);
int nb_images = parameters[0];
2022-04-26 17:09:12 +02:00
int width = parameters[1];
int height = parameters[2];
2022-04-26 16:47:43 +02:00
int*** images = read_mnist_images(fichier_images);
2022-04-14 13:02:09 +02:00
2022-05-10 21:04:48 +02:00
float** results = recognize(model, fichier_images);
2022-04-14 13:02:09 +02:00
unsigned int* labels = read_mnist_labels(fichier_labels);
float accuracy;
for (int i=0; i < nb_images; i++) {
2022-05-10 21:04:48 +02:00
if (indice_max(results[i], nb_der_layer) == labels[i]) {
2022-04-26 16:47:43 +02:00
accuracy += 1. / (float)nb_images;
2022-04-26 17:29:25 +02:00
} else if (preview_fails) {
2022-05-10 21:04:48 +02:00
printf("--- Image %d, %d --- Prévision: %d ---\n", i, labels[i], indice_max(results[i], nb_der_layer));
print_image(width, height, images[i], results[i]);
2022-04-14 13:02:09 +02:00
}
}
printf("%d Images\tAccuracy: %0.1f%%\n", nb_images, accuracy*100);
}
2022-04-08 15:53:29 +02:00
2022-04-07 22:07:32 +02:00
int main(int argc, char* argv[]) {
if (argc < 2) {
printf("Pas d'action spécifiée\n");
help(argv[0]);
exit(1);
}
if (! strcmp(argv[1], "train")) {
2022-04-26 17:46:41 +02:00
int batches = EPOCHS;
2022-04-26 16:47:43 +02:00
int layers = 2;
2022-04-07 22:07:32 +02:00
int neurons = 784;
char* images = NULL;
char* labels = NULL;
2022-04-08 15:53:29 +02:00
char* recovery = NULL;
2022-04-07 22:07:32 +02:00
char* out = NULL;
2022-04-14 13:02:09 +02:00
int i = 2;
2022-04-07 22:07:32 +02:00
while (i < argc) {
// Utiliser un switch serait sans doute plus élégant
if ((! strcmp(argv[i], "--batches"))||(! strcmp(argv[i], "-b"))) {
batches = strtol(argv[i+1], NULL, 10);
i += 2;
} else
2022-04-30 09:31:52 +02:00
if ((! strcmp(argv[i], "--couches"))||(! strcmp(argv[i], "-c"))) {
2022-04-25 14:39:45 +02:00
layers = strtol(argv[i+1], NULL, 10);
2022-04-07 22:07:32 +02:00
i += 2;
2022-04-30 09:31:52 +02:00
} else if ((! strcmp(argv[i], "--neurones"))||(! strcmp(argv[i], "-n"))) {
2022-04-07 22:07:32 +02:00
neurons = strtol(argv[i+1], NULL, 10);
i += 2;
} else if ((! strcmp(argv[i], "--images"))||(! strcmp(argv[i], "-i"))) {
images = argv[i+1];
i += 2;
} else if ((! strcmp(argv[i], "--labels"))||(! strcmp(argv[i], "-l"))) {
labels = argv[i+1];
i += 2;
2022-04-08 15:53:29 +02:00
} else if ((! strcmp(argv[i], "--recover"))||(! strcmp(argv[i], "-r"))) {
recovery = argv[i+1];
i += 2;
2022-04-07 22:07:32 +02:00
} else if ((! strcmp(argv[i], "--out"))||(! strcmp(argv[i], "-o"))) {
out = argv[i+1];
i += 2;
} else {
printf("%s : Argument non reconnu\n", argv[i]);
i++;
}
}
if (! images) {
printf("Pas de fichier d'images spécifié\n");
exit(1);
}
if (! labels) {
printf("Pas de fichier de labels spécifié\n");
exit(1);
}
if (! out) {
printf("Pas de fichier de sortie spécifié, default: out.bin\n");
out = "out.bin";
}
// Entraînement en sourçant neural_network.c
2022-04-25 14:39:45 +02:00
train(batches, layers, neurons, recovery, images, labels, out);
2022-04-07 22:07:32 +02:00
exit(0);
}
if (! strcmp(argv[1], "recognize")) {
char* in = NULL;
2022-05-10 21:04:48 +02:00
char* model = NULL;
2022-04-07 22:07:32 +02:00
char* out = NULL;
2022-04-14 13:02:09 +02:00
int i = 2;
2022-04-07 22:07:32 +02:00
while(i < argc) {
if ((! strcmp(argv[i], "--in"))||(! strcmp(argv[i], "-i"))) {
in = argv[i+1];
i += 2;
} else if ((! strcmp(argv[i], "--modele"))||(! strcmp(argv[i], "-m"))) {
2022-05-10 21:04:48 +02:00
model = argv[i+1];
2022-04-07 22:07:32 +02:00
i += 2;
} else if ((! strcmp(argv[i], "--out"))||(! strcmp(argv[i], "-o"))) {
out = argv[i+1];
i += 2;
} else {
printf("%s : Argument non reconnu\n", argv[i]);
i++;
}
}
if (! in) {
printf("Pas d'entrée spécifiée\n");
exit(1);
}
2022-05-10 21:04:48 +02:00
if (! model) {
2022-04-07 22:07:32 +02:00
printf("Pas de modèle spécifié\n");
exit(1);
}
if (! out) {
out = "text";
}
2022-05-10 21:04:48 +02:00
print_recognize(model, in, out);
2022-04-07 22:07:32 +02:00
// Reconnaissance puis affichage des données sous le format spécifié
exit(0);
}
2022-04-14 13:02:09 +02:00
if (! strcmp(argv[1], "test")) {
2022-05-10 21:04:48 +02:00
char* model = NULL;
2022-04-14 13:02:09 +02:00
char* images = NULL;
char* labels = NULL;
2022-04-26 16:47:43 +02:00
bool preview_fails = false;
2022-04-14 13:02:09 +02:00
int i = 2;
while (i < argc) {
if ((! strcmp(argv[i], "--images"))||(! strcmp(argv[i], "-i"))) {
images = argv[i+1];
i += 2;
} else if ((! strcmp(argv[i], "--labels"))||(! strcmp(argv[i], "-l"))) {
labels = argv[i+1];
i += 2;
} else if ((! strcmp(argv[i], "--modele"))||(! strcmp(argv[i], "-m"))) {
2022-05-10 21:04:48 +02:00
model = argv[i+1];
2022-04-14 13:02:09 +02:00
i += 2;
2022-04-26 16:47:43 +02:00
} else if ((! strcmp(argv[i], "--preview-fails"))||(! strcmp(argv[i], "-p"))) {
preview_fails = true;
i++;
2022-04-14 13:02:09 +02:00
}
}
2022-05-10 21:04:48 +02:00
test(model, images, labels, preview_fails);
2022-04-14 13:02:09 +02:00
exit(0);
}
2022-04-07 22:07:32 +02:00
printf("Option choisie non reconnue: %s\n", argv[1]);
help(argv[0]);
return 1;
}