diff --git a/doc/resources.md b/doc/resources.md index f1282a2..09c2a1c 100644 --- a/doc/resources.md +++ b/doc/resources.md @@ -2,17 +2,17 @@ ## Simple Neural Network - [3Blue1Brown](https://www.3blue1brown.com/topics/neural-networks) - -- [Neptune.ai](https://neptune.ai/blog/backpropagation-algorithm-in-neural-networks-guide) - +- [Medium](https://medium.com/@14prakash/back-propagation-is-very-simple-who-made-it-complicated-97b794c97e5c) - [Simeon Kostadinov: Understanding Backpropagation](https://towardsdatascience.com/understanding-backpropagation-algorithm-7bb3aa2f95fd) - - [Tobias Hill: Gradient Descent](https://towardsdatascience.com/part-2-gradient-descent-and-backpropagation-bf90932c066a) ## Convolutional Neural Network +- [The Independent Code](https://www.youtube.com/watch?v=Lakz2MoHy6o) + ## Jeux de données - [MNIST](http://yann.lecun.com/exdb/mnist/) +- [ImageNet](https://www.image-net.org/index.php) ## CUDA - [Introduction à CUDA](https://developer.nvidia.com/blog/even-easier-introduction-cuda/) (Documentation Nvidia) diff --git a/src/cnn/cnn.c b/src/cnn/cnn.c index 03daa07..7581005 100644 --- a/src/cnn/cnn.c +++ b/src/cnn/cnn.c @@ -6,6 +6,7 @@ #include "include/initialisation.h" #include "include/function.h" #include "include/creation.h" +#include "include/update.h" #include "include/make.h" #include "../include/colors.h" @@ -130,85 +131,6 @@ void copy_input_to_input_z(float*** output, float*** output_a, int output_depth, } } -void update_weights(Network* network) { - int n = network->size; - int input_depth, input_width, output_depth, output_width; - Kernel* k_i; - Kernel* k_i_1; - for (int i=0; i<(n-1); i++) { - k_i = network->kernel[i]; - k_i_1 = network->kernel[i+1]; - input_depth = network->depth[i]; - input_width = network->width[i]; - output_depth = network->depth[i+1]; - output_width = network->width[i+1]; - - if (k_i->cnn) { // Convolution - Kernel_cnn* cnn = k_i_1->cnn; - int k_size = cnn->k_size; - for (int a=0; aw[a][b][c][d] += cnn->d_w[a][b][c][d]; - } - } - } - } - } else if (k_i->nn) { // Full connection - if (input_depth==1) { // Vecteur -> Vecteur - Kernel_nn* nn = k_i_1->nn; - for (int a=0; aweights[a][b] += nn->d_weights[a][b]; - } - } - } else { // Matrice -> vecteur - Kernel_nn* nn = k_i_1->nn; - int input_size = input_width*input_width*input_depth; - for (int a=0; aweights[a][b] += nn->d_weights[a][b]; - } - } - } - } else { // Pooling - (void)0; // Ne rien faire pour la couche pooling - } - } -} - -void update_bias(Network* network) { - int n = network->size; - int output_width, output_depth; - Kernel* k_i; - Kernel* k_i_1; - for (int i=0; i<(n-1); i++) { - k_i = network->kernel[i]; - k_i_1 = network->kernel[i+1]; - output_width = network->width[i+1]; - output_depth = network->depth[i+1]; - - if (k_i->cnn) { // Convolution - Kernel_cnn* cnn = k_i_1->cnn; - for (int a=0; abias[a][b][c] += cnn->d_bias[a][b][c]; - } - } - } - } else if (k_i->nn) { // Full connection - Kernel_nn* nn = k_i_1->nn; - for (int a=0; abias[a] += nn->d_bias[a]; - } - } else { // Pooling - (void)0; // Ne rien faire pour la couche pooling - } - } -} - float compute_mean_squared_error(float* output, float* wanted_output, int len) { if (len==0) { printf("Erreur MSE: la longueur de la sortie est de 0 -> division par 0 impossible\n"); diff --git a/src/cnn/creation.c b/src/cnn/creation.c index 7a004ef..582a224 100644 --- a/src/cnn/creation.c +++ b/src/cnn/creation.c @@ -29,7 +29,9 @@ Network* create_network(int max_size, int learning_rate, int dropout, int initia network->kernel[0]->nn = NULL; network->kernel[0]->cnn = NULL; create_a_cube_input_layer(network, 0, input_depth, input_dim); - create_a_cube_input_z_layer(network, 0, input_depth, input_dim); + // create_a_cube_input_z_layer(network, 0, input_depth, input_dim); + // This shouldn't be used (if I'm not mistaken) so to save space, we can do: + ntework->input_z[0] = NULL; // As we don't backpropagate the input return network; } @@ -104,7 +106,7 @@ void add_2d_average_pooling(Network* network, int dim_output) { network->kernel[k_pos]->nn = NULL; network->kernel[k_pos]->activation = 100*kernel_size; // Ne contient pas de fonction d'activation create_a_cube_input_layer(network, n, network->depth[n-1], network->width[n-1]/2); - create_a_cube_input_z_layer(network, n, network->depth[n-1], network->width[n-1]/2); + create_a_cube_input_z_layer(network, n, network->depth[n-1], network->width[n-1]/2); // Will it be used ? network->size++; } @@ -130,33 +132,26 @@ void add_convolution(Network* network, int depth_output, int dim_output, int act cnn->columns = depth_output; cnn->w = (float****)malloc(sizeof(float***)*depth_input); cnn->d_w = (float****)malloc(sizeof(float***)*depth_input); - cnn->last_d_w = (float****)malloc(sizeof(float***)*depth_input); for (int i=0; i < depth_input; i++) { cnn->w[i] = (float***)malloc(sizeof(float**)*depth_output); cnn->d_w[i] = (float***)malloc(sizeof(float**)*depth_output); - cnn->last_d_w[i] = (float***)malloc(sizeof(float**)*depth_output); for (int j=0; j < depth_output; j++) { cnn->w[i][j] = (float**)malloc(sizeof(float*)*kernel_size); cnn->d_w[i][j] = (float**)malloc(sizeof(float*)*kernel_size); - cnn->last_d_w[i][j] = (float**)malloc(sizeof(float*)*kernel_size); for (int k=0; k < kernel_size; k++) { cnn->w[i][j][k] = (float*)malloc(sizeof(float)*kernel_size); cnn->d_w[i][j][k] = (float*)malloc(sizeof(float)*kernel_size); - cnn->last_d_w[i][j][k] = (float*)malloc(sizeof(float)*kernel_size); } } } cnn->bias = (float***)malloc(sizeof(float**)*depth_output); cnn->d_bias = (float***)malloc(sizeof(float**)*depth_output); - cnn->last_d_bias = (float***)malloc(sizeof(float**)*depth_output); for (int i=0; i < depth_output; i++) { cnn->bias[i] = (float**)malloc(sizeof(float*)*bias_size); cnn->d_bias[i] = (float**)malloc(sizeof(float*)*bias_size); - cnn->last_d_bias[i] = (float**)malloc(sizeof(float*)*bias_size); for (int j=0; j < bias_size; j++) { cnn->bias[i][j] = (float*)malloc(sizeof(float)*bias_size); cnn->d_bias[i][j] = (float*)malloc(sizeof(float)*bias_size); - cnn->last_d_bias[i][j] = (float*)malloc(sizeof(float)*bias_size); } } create_a_cube_input_layer(network, n, depth_output, bias_size); @@ -188,14 +183,11 @@ void add_dense(Network* network, int output_units, int activation) { nn->output_units = output_units; nn->bias = (float*)malloc(sizeof(float)*output_units); nn->d_bias = (float*)malloc(sizeof(float)*output_units); - nn->last_d_bias = (float*)malloc(sizeof(float)*output_units); nn->weights = (float**)malloc(sizeof(float*)*input_units); nn->d_weights = (float**)malloc(sizeof(float*)*input_units); - nn->last_d_weights = (float**)malloc(sizeof(float*)*input_units); for (int i=0; i < input_units; i++) { nn->weights[i] = (float*)malloc(sizeof(float)*output_units); nn->d_weights[i] = (float*)malloc(sizeof(float)*output_units); - nn->last_d_weights[i] = (float*)malloc(sizeof(float)*output_units); } create_a_line_input_layer(network, n, output_units); create_a_line_input_z_layer(network, n, output_units); @@ -227,14 +219,11 @@ void add_dense_linearisation(Network* network, int output_units, int activation) nn->bias = (float*)malloc(sizeof(float)*output_units); nn->d_bias = (float*)malloc(sizeof(float)*output_units); - nn->last_d_bias = (float*)malloc(sizeof(float)*output_units); nn->weights = (float**)malloc(sizeof(float*)*input_units); nn->d_weights = (float**)malloc(sizeof(float*)*input_units); - nn->last_d_weights = (float**)malloc(sizeof(float*)*input_units); for (int i=0; i < input_units; i++) { nn->weights[i] = (float*)malloc(sizeof(float)*output_units); nn->d_weights[i] = (float*)malloc(sizeof(float)*output_units); - nn->last_d_weights[i] = (float*)malloc(sizeof(float)*output_units); } /* Not currently used initialisation_1d_matrix(network->initialisation, nn->bias, output_units, output_units+input_units); diff --git a/src/cnn/free.c b/src/cnn/free.c index c4092b6..445572e 100644 --- a/src/cnn/free.c +++ b/src/cnn/free.c @@ -40,34 +40,27 @@ void free_convolution(Network* network, int pos) { for (int j=0; j < bias_size; j++) { free(k_pos->bias[i][j]); free(k_pos->d_bias[i][j]); - free(k_pos->last_d_bias[i][j]); } free(k_pos->bias[i]); free(k_pos->d_bias[i]); - free(k_pos->last_d_bias[i]); } free(k_pos->bias); free(k_pos->d_bias); - free(k_pos->last_d_bias); for (int i=0; i < r; i++) { for (int j=0; j < c; j++) { for (int k=0; k < k_size; k++) { free(k_pos->w[i][j][k]); free(k_pos->d_w[i][j][k]); - free(k_pos->last_d_w[i][j][k]); } free(k_pos->w[i][j]); free(k_pos->d_w[i][j]); - free(k_pos->last_d_w[i][j]); } free(k_pos->w[i]); free(k_pos->d_w[i]); - free(k_pos->last_d_w[i]); } free(k_pos->w); free(k_pos->d_w); - free(k_pos->last_d_w); free(k_pos); } @@ -79,15 +72,12 @@ void free_dense(Network* network, int pos) { for (int i=0; i < dim; i++) { free(k_pos->weights[i]); free(k_pos->d_weights[i]); - free(k_pos->last_d_weights[i]); } free(k_pos->weights); free(k_pos->d_weights); - free(k_pos->last_d_weights); free(k_pos->bias); free(k_pos->d_bias); - free(k_pos->last_d_bias); free(k_pos); } @@ -99,15 +89,12 @@ void free_dense_linearisation(Network* network, int pos) { for (int i=0; i < dim; i++) { free(k_pos->weights[i]); free(k_pos->d_weights[i]); - free(k_pos->last_d_weights[i]); } free(k_pos->weights); free(k_pos->d_weights); - free(k_pos->last_d_weights); free(k_pos->bias); free(k_pos->d_bias); - free(k_pos->last_d_bias); free(k_pos); } diff --git a/src/cnn/include/function.h b/src/cnn/include/function.h index a3fdcf8..344edb2 100644 --- a/src/cnn/include/function.h +++ b/src/cnn/include/function.h @@ -48,6 +48,9 @@ void choose_apply_function_matrix(int activation, float*** input, int depth, int */ void choose_apply_function_vector(int activation, float*** input, int dim); +/* +* Renvoie la fonction d'activation correspondant à son identifiant (activation) +*/ ptr get_function_activation(int activation); #endif \ No newline at end of file diff --git a/src/cnn/include/struct.h b/src/cnn/include/struct.h index d630e3a..d8c6224 100644 --- a/src/cnn/include/struct.h +++ b/src/cnn/include/struct.h @@ -7,10 +7,8 @@ typedef struct Kernel_cnn { int columns; // Depth of the output float*** bias; // bias[columns][dim_output][dim_output] float*** d_bias; // d_bias[columns][dim_output][dim_output] - float*** last_d_bias; // last_d_bias[columns][dim_output][dim_output] float**** w; // w[rows][columns][k_size][k_size] float**** d_w; // d_w[rows][columns][k_size][k_size] - float**** last_d_w; // last_d_w[rows][columns][k_size][k_size] } Kernel_cnn; typedef struct Kernel_nn { @@ -18,10 +16,8 @@ typedef struct Kernel_nn { int output_units; // Nombre d'éléments en sortie float* bias; // bias[output_units] float* d_bias; // d_bias[output_units] - float* last_d_bias; // last_d_bias[output_units] float** weights; // weight[input_units][output_units] float** d_weights; // d_weights[input_units][output_units] - float** last_d_weights; // last_d_weights[input_units][output_units] } Kernel_nn; typedef struct Kernel { diff --git a/src/cnn/include/update.h b/src/cnn/include/update.h new file mode 100644 index 0000000..345060f --- /dev/null +++ b/src/cnn/include/update.h @@ -0,0 +1,26 @@ +#ifndef DEF_UPDATE_H +#define DEF_UPDATE_H + +/* +* Met à jours les poids à partir de données obtenus après plusieurs backpropagations +* Puis met à 0 tous les d_weights +*/ +void update_weights(Network* network); + +/* +* Met à jours les biais à partir de données obtenus après plusieurs backpropagations +* Puis met à 0 tous les d_bias +*/ +void update_bias(Network* network); + +/* +* Met à 0 toutes les données de backpropagation de poids +*/ +void reset_d_weights(Network* network); + +/* +* Met à 0 toutes les données de backpropagation de biais +*/ +void reset_d_bias(Network* network); + +#endif \ No newline at end of file diff --git a/src/cnn/update.c b/src/cnn/update.c new file mode 100644 index 0000000..aab9c8f --- /dev/null +++ b/src/cnn/update.c @@ -0,0 +1,165 @@ + +#include "update.h" + +void update_weights(Network* network) { + int n = network->size; + int input_depth, input_width, output_depth, output_width; + Kernel* k_i; + Kernel* k_i_1; + for (int i=0; i<(n-1); i++) { + k_i = network->kernel[i]; + k_i_1 = network->kernel[i+1]; + input_depth = network->depth[i]; + input_width = network->width[i]; + output_depth = network->depth[i+1]; + output_width = network->width[i+1]; + + if (k_i->cnn) { // Convolution + Kernel_cnn* cnn = k_i_1->cnn; + int k_size = cnn->k_size; + for (int a=0; aw[a][b][c][d] += cnn->d_w[a][b][c][d]; + cnn->d_w[a][b][c][d] = 0; + } + } + } + } + } else if (k_i->nn) { // Full connection + if (input_depth==1) { // Vecteur -> Vecteur + Kernel_nn* nn = k_i_1->nn; + for (int a=0; aweights[a][b] += nn->d_weights[a][b]; + nn->d_weights[a][b] = 0; + } + } + } else { // Matrice -> vecteur + Kernel_nn* nn = k_i_1->nn; + int input_size = input_width*input_width*input_depth; + for (int a=0; aweights[a][b] += nn->d_weights[a][b]; + nn->d_weights[a][b] = 0; + } + } + } + } else { // Pooling + (void)0; // Ne rien faire pour la couche pooling + } + } +} + +void update_bias(Network* network) { + int n = network->size; + int output_width, output_depth; + Kernel* k_i; + Kernel* k_i_1; + for (int i=0; i<(n-1); i++) { + k_i = network->kernel[i]; + k_i_1 = network->kernel[i+1]; + output_width = network->width[i+1]; + output_depth = network->depth[i+1]; + + if (k_i->cnn) { // Convolution + Kernel_cnn* cnn = k_i_1->cnn; + for (int a=0; abias[a][b][c] += cnn->d_bias[a][b][c]; + cnn->d_bias[a][b][c] = 0; + } + } + } + } else if (k_i->nn) { // Full connection + Kernel_nn* nn = k_i_1->nn; + for (int a=0; abias[a] += nn->d_bias[a]; + nn->d_bias[a] = 0; + } + } else { // Pooling + (void)0; // Ne rien faire pour la couche pooling + } + } +} + +void reset_d_weights(Network* network) { + int n = network->size; + int input_depth, input_width, output_depth, output_width; + Kernel* k_i; + Kernel* k_i_1; + for (int i=0; i<(n-1); i++) { + k_i = network->kernel[i]; + k_i_1 = network->kernel[i+1]; + input_depth = network->depth[i]; + input_width = network->width[i]; + output_depth = network->depth[i+1]; + output_width = network->width[i+1]; + + if (k_i->cnn) { // Convolution + Kernel_cnn* cnn = k_i_1->cnn; + int k_size = cnn->k_size; + for (int a=0; ad_w[a][b][c][d] = 0; + } + } + } + } + } else if (k_i->nn) { // Full connection + if (input_depth==1) { // Vecteur -> Vecteur + Kernel_nn* nn = k_i_1->nn; + for (int a=0; ad_weights[a][b] = 0; + } + } + } else { // Matrice -> vecteur + Kernel_nn* nn = k_i_1->nn; + int input_size = input_width*input_width*input_depth; + for (int a=0; ad_weights[a][b] = 0; + } + } + } + } else { // Pooling + (void)0; // Ne rien faire pour la couche pooling + } + } +} + +void reset_d_bias(Network* network) { + int n = network->size; + int output_width, output_depth; + Kernel* k_i; + Kernel* k_i_1; + for (int i=0; i<(n-1); i++) { + k_i = network->kernel[i]; + k_i_1 = network->kernel[i+1]; + output_width = network->width[i+1]; + output_depth = network->depth[i+1]; + + if (k_i->cnn) { // Convolution + Kernel_cnn* cnn = k_i_1->cnn; + for (int a=0; ad_bias[a][b][c] = 0; + } + } + } + } else if (k_i->nn) { // Full connection + Kernel_nn* nn = k_i_1->nn; + for (int a=0; ad_bias[a] = 0; + } + } else { // Pooling + (void)0; // Ne rien faire pour la couche pooling + } + } +} \ No newline at end of file diff --git a/test/cnn_convolution.cu b/test/cnn_convolution.cu index f092431..28c863d 100644 --- a/test/cnn_convolution.cu +++ b/test/cnn_convolution.cu @@ -104,16 +104,13 @@ void run_convolution_test(int input_dim, int output_dim, int rows, int columns) // bias[kernel->columns][dim_output][dim_output] kernel->bias = create_matrix(kernel->columns, output_dim, output_dim, 15.0f); kernel->d_bias = create_matrix(kernel->columns, output_dim, output_dim, 1.5f); - kernel->last_d_bias = create_matrix(kernel->columns, output_dim, output_dim, 0.1f); // w[rows][columns][k_size][k_size] kernel->w = (float****)malloc(sizeof(float***)*kernel->rows); kernel->d_w = (float****)malloc(sizeof(float***)*kernel->rows); - kernel->last_d_w = (float****)malloc(sizeof(float***)*kernel->rows); for (int i=0; i < kernel->rows; i++) { kernel->w[i] = create_matrix(kernel->columns, kernel->k_size, kernel->k_size, 15.0f); kernel->d_w[i] = create_matrix(kernel->columns, kernel->k_size, kernel->k_size, 1.5f); - kernel->last_d_w[i] = create_matrix(kernel->columns, kernel->k_size, kernel->k_size, 0.1f); } float*** input = create_matrix(kernel->rows, input_dim, input_dim, 5.0f); @@ -151,16 +148,13 @@ void run_convolution_test(int input_dim, int output_dim, int rows, int columns) free_matrix(kernel->bias, kernel->columns, output_dim); free_matrix(kernel->d_bias, kernel->columns, output_dim); - free_matrix(kernel->last_d_bias, kernel->columns, output_dim); for (int i=0; i < kernel->rows; i++) { free_matrix(kernel->w[i], kernel->columns, kernel->k_size); free_matrix(kernel->d_w[i], kernel->columns, kernel->k_size); - free_matrix(kernel->last_d_w[i], kernel->columns, kernel->k_size); } free(kernel->w); free(kernel->d_w); - free(kernel->last_d_w); free_matrix(input, kernel->rows, input_dim); free_matrix(output_cpu, kernel->columns, output_dim);