diff --git a/src/cnn/creation.c b/src/cnn/creation.c index a95344c..3c488f4 100644 --- a/src/cnn/creation.c +++ b/src/cnn/creation.c @@ -165,31 +165,67 @@ void add_convolution(Network* network, int depth_output, int dim_output, int act cnn->columns = depth_output; cnn->weights = (float****)nalloc(depth_input, sizeof(float***)); cnn->d_weights = (float****)nalloc(depth_input, sizeof(float***)); + #ifdef ADAM_CNN_WEIGHTS + cnn->s_d_weights = (float****)nalloc(depth_input, sizeof(float***)); + cnn->v_d_weights = (float****)nalloc(depth_input, sizeof(float***)); + #endif for (int i=0; i < depth_input; i++) { cnn->weights[i] = (float***)nalloc(depth_output, sizeof(float**)); cnn->d_weights[i] = (float***)nalloc(depth_output, sizeof(float**)); + #ifdef ADAM_CNN_WEIGHTS + cnn->s_d_weights[i] = (float***)nalloc(depth_output, sizeof(float**)); + cnn->v_d_weights[i] = (float***)nalloc(depth_output, sizeof(float**)); + #endif for (int j=0; j < depth_output; j++) { cnn->weights[i][j] = (float**)nalloc(kernel_size, sizeof(float*)); cnn->d_weights[i][j] = (float**)nalloc(kernel_size, sizeof(float*)); + #ifdef ADAM_CNN_WEIGHTS + cnn->s_d_weights[i][j] = (float**)nalloc(kernel_size, sizeof(float*)); + cnn->v_d_weights[i][j] = (float**)nalloc(kernel_size, sizeof(float*)); + #endif for (int k=0; k < kernel_size; k++) { cnn->weights[i][j][k] = (float*)nalloc(kernel_size, sizeof(float)); cnn->d_weights[i][j][k] = (float*)nalloc(kernel_size, sizeof(float)); + #ifdef ADAM_CNN_WEIGHTS + cnn->s_d_weights[i][j][k] = (float*)nalloc(kernel_size, sizeof(float)); + cnn->v_d_weights[i][j][k] = (float*)nalloc(kernel_size, sizeof(float)); + #endif for (int l=0; l < kernel_size; l++) { cnn->d_weights[i][j][k][l] = 0.; + #ifdef ADAM_CNN_WEIGHTS + cnn->s_d_weights[i][j][k][l] = 0.; + cnn->v_d_weights[i][j][k][l] = 0.; + #endif } } } } cnn->bias = (float***)nalloc(depth_output, sizeof(float**)); cnn->d_bias = (float***)nalloc(depth_output, sizeof(float**)); + #ifdef ADAM_CNN_BIAS + cnn->s_d_bias = (float***)nalloc(depth_output, sizeof(float**)); + cnn->v_d_bias = (float***)nalloc(depth_output, sizeof(float**)); + #endif for (int i=0; i < depth_output; i++) { cnn->bias[i] = (float**)nalloc(bias_size, sizeof(float*)); cnn->d_bias[i] = (float**)nalloc(bias_size, sizeof(float*)); + #ifdef ADAM_CNN_BIAS + cnn->s_d_bias[i] = (float**)nalloc(bias_size, sizeof(float*)); + cnn->v_d_bias[i] = (float**)nalloc(bias_size, sizeof(float*)); + #endif for (int j=0; j < bias_size; j++) { cnn->bias[i][j] = (float*)nalloc(bias_size, sizeof(float)); cnn->d_bias[i][j] = (float*)nalloc(bias_size, sizeof(float)); + #ifdef ADAM_CNN_BIAS + cnn->s_d_bias[i][j] = (float*)nalloc(bias_size, sizeof(float)); + cnn->v_d_bias[i][j] = (float*)nalloc(bias_size, sizeof(float)); + #endif for (int k=0; k < bias_size; k++) { cnn->d_bias[i][j][k] = 0.; + #ifdef ADAM_CNN_BIAS + cnn->s_d_bias[i][j][k] = 0.; + cnn->v_d_bias[i][j][k] = 0.; + #endif } } } @@ -220,17 +256,37 @@ void add_dense(Network* network, int size_output, int activation) { nn->size_output = size_output; nn->bias = (float*)nalloc(size_output, sizeof(float)); nn->d_bias = (float*)nalloc(size_output, sizeof(float)); + #ifdef ADAM_DENSE_BIAS + nn->s_d_bias = (float*)nalloc(size_output, sizeof(float)); + nn->v_d_bias = (float*)nalloc(size_output, sizeof(float)); + #endif for (int i=0; i < size_output; i++) { nn->d_bias[i] = 0.; + #ifdef ADAM_DENSE_BIAS + nn->s_d_bias[i] = 0.; + nn->v_d_bias[i] = 0.; + #endif } nn->weights = (float**)nalloc(size_input, sizeof(float*)); nn->d_weights = (float**)nalloc(size_input, sizeof(float*)); + #ifdef ADAM_DENSE_WEIGHTS + nn->s_d_weights = (float**)nalloc(size_input, sizeof(float*)); + nn->v_d_weights = (float**)nalloc(size_input, sizeof(float*)); + #endif for (int i=0; i < size_input; i++) { nn->weights[i] = (float*)nalloc(size_output, sizeof(float)); nn->d_weights[i] = (float*)nalloc(size_output, sizeof(float)); + #ifdef ADAM_DENSE_WEIGHTS + nn->s_d_weights[i] = (float*)nalloc(size_output, sizeof(float)); + nn->v_d_weights[i] = (float*)nalloc(size_output, sizeof(float)); + #endif for (int j=0; j < size_output; j++) { nn->d_weights[i][j] = 0.; + #ifdef ADAM_DENSE_WEIGHTS + nn->s_d_weights[i][j] = 0.; + nn->v_d_weights[i][j] = 0.; + #endif } } @@ -262,16 +318,36 @@ void add_dense_linearisation(Network* network, int size_output, int activation) nn->bias = (float*)nalloc(size_output, sizeof(float)); nn->d_bias = (float*)nalloc(size_output, sizeof(float)); + #ifdef ADAM_DENSE_BIAS + nn->s_d_bias = (float*)nalloc(size_output, sizeof(float)); + nn->v_d_bias = (float*)nalloc(size_output, sizeof(float)); + #endif for (int i=0; i < size_output; i++) { nn->d_bias[i] = 0.; + #ifdef ADAM_DENSE_BIAS + nn->s_d_bias[i] = 0.; + nn->v_d_bias[i] = 0.; + #endif } nn->weights = (float**)nalloc(size_input, sizeof(float*)); nn->d_weights = (float**)nalloc(size_input, sizeof(float*)); + #ifdef ADAM_DENSE_WEIGHTS + nn->s_d_weights = (float**)nalloc(size_input, sizeof(float*)); + nn->v_d_weights = (float**)nalloc(size_input, sizeof(float*)); + #endif for (int i=0; i < size_input; i++) { nn->weights[i] = (float*)nalloc(size_output, sizeof(float)); nn->d_weights[i] = (float*)nalloc(size_output, sizeof(float)); + #ifdef ADAM_DENSE_WEIGHTS + nn->s_d_weights[i] = (float*)nalloc(size_output, sizeof(float)); + nn->v_d_weights[i] = (float*)nalloc(size_output, sizeof(float)); + #endif for (int j=0; j < size_output; j++) { nn->d_weights[i][j] = 0.; + #ifdef ADAM_DENSE_WEIGHTS + nn->s_d_weights[i][j] = 0.; + nn->v_d_weights[i][j] = 0.; + #endif } } initialisation_1d_matrix(network->initialisation, nn->bias, size_output, size_input, size_output); diff --git a/src/cnn/free.c b/src/cnn/free.c index b9a4943..539b78e 100644 --- a/src/cnn/free.c +++ b/src/cnn/free.c @@ -42,27 +42,55 @@ void free_convolution(Network* network, int pos) { for (int j=0; j < bias_size; j++) { gree(k_pos->bias[i][j]); gree(k_pos->d_bias[i][j]); + #ifdef ADAM_CNN_BIAS + gree(k_pos->s_d_bias[i][j]); + gree(k_pos->v_d_bias[i][j]); + #endif } gree(k_pos->bias[i]); gree(k_pos->d_bias[i]); + #ifdef ADAM_CNN_BIAS + gree(k_pos->s_d_bias[i]); + gree(k_pos->v_d_bias[i]); + #endif } gree(k_pos->bias); gree(k_pos->d_bias); + #ifdef ADAM_CNN_BIAS + gree(k_pos->s_d_bias); + gree(k_pos->v_d_bias); + #endif for (int i=0; i < r; i++) { for (int j=0; j < c; j++) { for (int k=0; k < k_size; k++) { gree(k_pos->weights[i][j][k]); gree(k_pos->d_weights[i][j][k]); + #ifdef ADAM_CNN_WEIGHTS + gree(k_pos->s_d_weights[i][j][k]); + gree(k_pos->v_d_weights[i][j][k]); + #endif } gree(k_pos->weights[i][j]); gree(k_pos->d_weights[i][j]); + #ifdef ADAM_CNN_WEIGHTS + gree(k_pos->s_d_weights[i][j]); + gree(k_pos->v_d_weights[i][j]); + #endif } gree(k_pos->weights[i]); gree(k_pos->d_weights[i]); + #ifdef ADAM_CNN_WEIGHTS + gree(k_pos->s_d_weights[i]); + gree(k_pos->v_d_weights[i]); + #endif } gree(k_pos->weights); gree(k_pos->d_weights); + #ifdef ADAM_CNN_WEIGHTS + gree(k_pos->s_d_weights); + gree(k_pos->v_d_weights); + #endif gree(k_pos); } @@ -74,12 +102,24 @@ void free_dense(Network* network, int pos) { for (int i=0; i < dim; i++) { gree(k_pos->weights[i]); gree(k_pos->d_weights[i]); + #ifdef ADAM_DENSE_WEIGHTS + gree(k_pos->s_d_weights[i]); + gree(k_pos->v_d_weights[i]); + #endif } gree(k_pos->weights); gree(k_pos->d_weights); + #ifdef ADAM_DENSE_WEIGHTS + gree(k_pos->s_d_weights); + gree(k_pos->v_d_weights); + #endif gree(k_pos->bias); gree(k_pos->d_bias); + #ifdef ADAM_DENSE_BIAS + gree(k_pos->s_d_bias); + gree(k_pos->v_d_bias); + #endif gree(k_pos); } @@ -91,12 +131,24 @@ void free_dense_linearisation(Network* network, int pos) { for (int i=0; i < dim; i++) { gree(k_pos->weights[i]); gree(k_pos->d_weights[i]); + #ifdef ADAM_DENSE_WEIGHTS + gree(k_pos->s_d_weights[i]); + gree(k_pos->v_d_weights[i]); + #endif } gree(k_pos->weights); gree(k_pos->d_weights); + #ifdef ADAM_DENSE_WEIGHTS + gree(k_pos->s_d_weights); + gree(k_pos->v_d_weights); + #endif gree(k_pos->bias); gree(k_pos->d_bias); + #ifdef ADAM_DENSE_BIAS + gree(k_pos->s_d_bias); + gree(k_pos->v_d_bias); + #endif gree(k_pos); } diff --git a/src/cnn/include/config.h b/src/cnn/include/config.h index 92daf00..6c4535e 100644 --- a/src/cnn/include/config.h +++ b/src/cnn/include/config.h @@ -1,12 +1,27 @@ #ifndef DEF_CONFIG_H #define DEF_CONFIG_H + //* Paramètres d'entraînement #define EPOCHS 10 // Nombre d'époques par défaut (itérations sur toutes les images) #define BATCHES 32 // Nombre d'images à voir avant de mettre le réseau à jour #define LEARNING_RATE 3e-4 // Taux d'apprentissage #define USE_MULTITHREADING // Commenter pour utiliser un seul coeur durant l'apprentissage (meilleur pour des tailles de batchs traités rapidement) +//* Paramètres d'ADAM optimizer +#define ALPHA 3e-4 +#define BETA_1 0.9 +#define BETA_2 0.999 +#define Epsilon 1e-7 + +//* Options d'ADAM optimizer +//* Activer ou désactiver Adam sur les couches dense +//#define ADAM_DENSE_WEIGHTS +//#define ADAM_DENSE_BIAS +//* Activer ou désactiver Adam sur les couches convolutives +//#define ADAM_CNN_WEIGHTS +//#define ADAM_CNN_BIAS + //* Paramètre d'optimisation pour un dataset Jpeg // keep images in ram e.g re-read and decompress each time diff --git a/src/cnn/include/struct.h b/src/cnn/include/struct.h index 90df7eb..1f04c43 100644 --- a/src/cnn/include/struct.h +++ b/src/cnn/include/struct.h @@ -1,6 +1,8 @@ #ifndef DEF_STRUCT_H #define DEF_STRUCT_H +#include "config.h" + #define NO_POOLING 0 #define AVG_POOLING 1 #define MAX_POOLING 2 @@ -15,8 +17,16 @@ typedef struct Kernel_cnn { int columns; // Depth de l'output float*** bias; // bias[columns][dim_output][dim_output] float*** d_bias; // d_bias[columns][dim_output][dim_output] + #ifdef ADAM_CNN_BIAS + float*** s_d_bias; // s_d_bias[columns][dim_output][dim_output] + float*** v_d_bias; // v_d_bias[columns][dim_output][dim_output] + #endif float**** weights; // weights[rows][columns][k_size][k_size] float**** d_weights; // d_weights[rows][columns][k_size][k_size] + #ifdef ADAM_CNN_WEIGHTS + float**** s_d_weights; // s_d_weights[rows][columns][k_size][k_size] + float**** v_d_weights; // v_d_weights[rows][columns][k_size][k_size] + #endif } Kernel_cnn; typedef struct Kernel_nn { @@ -25,8 +35,16 @@ typedef struct Kernel_nn { int size_output; // Nombre d'éléments en sortie float* bias; // bias[size_output] float* d_bias; // d_bias[size_output] + #ifdef ADAM_DENSE_BIAS + float* s_d_bias; // s_d_bias[size_output] + float* v_d_bias; // v_d_bias[size_output] + #endif float** weights; // weight[size_input][size_output] float** d_weights; // d_weights[size_input][size_output] + #ifdef ADAM_DENSE_WEIGHTS + float** s_d_weights; // s_d_weights[size_input][size_output] + float** v_d_weights; // v_d_weights[size_input][size_output] + #endif } Kernel_nn; typedef struct Kernel { diff --git a/src/cnn/neuron_io.c b/src/cnn/neuron_io.c index c29fdc1..a009f59 100644 --- a/src/cnn/neuron_io.c +++ b/src/cnn/neuron_io.c @@ -249,35 +249,71 @@ Kernel* read_kernel(int type_couche, int output_dim, FILE* ptr) { cnn->bias = (float***)nalloc(cnn->columns, sizeof(float**)); cnn->d_bias = (float***)nalloc(cnn->columns, sizeof(float**)); + #ifdef ADAM_CNN_BIAS + cnn->s_d_bias = (float***)nalloc(cnn->columns, sizeof(float**)); + cnn->v_d_bias = (float***)nalloc(cnn->columns, sizeof(float**)); + #endif for (int i=0; i < cnn->columns; i++) { cnn->bias[i] = (float**)nalloc(output_dim, sizeof(float*)); cnn->d_bias[i] = (float**)nalloc(output_dim, sizeof(float*)); + #ifdef ADAM_CNN_BIAS + cnn->s_d_bias[i] = (float**)nalloc(output_dim, sizeof(float*)); + cnn->v_d_bias[i] = (float**)nalloc(output_dim, sizeof(float*)); + #endif for (int j=0; j < output_dim; j++) { cnn->bias[i][j] = (float*)nalloc(output_dim, sizeof(float)); cnn->d_bias[i][j] = (float*)nalloc(output_dim, sizeof(float)); + #ifdef ADAM_CNN_BIAS + cnn->s_d_bias[i][j] = (float*)nalloc(output_dim, sizeof(float)); + cnn->v_d_bias[i][j] = (float*)nalloc(output_dim, sizeof(float)); + #endif for (int k=0; k < output_dim; k++) { (void) !fread(&tmp, sizeof(tmp), 1, ptr); cnn->bias[i][j][k] = tmp; cnn->d_bias[i][j][k] = 0.; + #ifdef ADAM_CNN_BIAS + cnn->s_d_bias[i][j][k] = 0.; + cnn->v_d_bias[i][j][k] = 0.; + #endif } } } cnn->weights = (float****)nalloc(cnn->rows, sizeof(float***)); cnn->d_weights = (float****)nalloc(cnn->rows, sizeof(float***)); + #ifdef ADAM_CNN_WEIGHTS + cnn->s_d_weights = (float****)nalloc(cnn->rows, sizeof(float***)); + cnn->v_d_weights = (float****)nalloc(cnn->rows, sizeof(float***)); + #endif for (int i=0; i < cnn->rows; i++) { cnn->weights[i] = (float***)nalloc(cnn->columns, sizeof(float**)); cnn->d_weights[i] = (float***)nalloc(cnn->columns, sizeof(float**)); + #ifdef ADAM_CNN_WEIGHTS + cnn->s_d_weights[i] = (float***)nalloc(cnn->columns, sizeof(float**)); + cnn->v_d_weights[i] = (float***)nalloc(cnn->columns, sizeof(float**)); + #endif for (int j=0; j < cnn->columns; j++) { cnn->weights[i][j] = (float**)nalloc(cnn->k_size, sizeof(float*)); cnn->d_weights[i][j] = (float**)nalloc(cnn->k_size, sizeof(float*)); + #ifdef ADAM_CNN_WEIGHTS + cnn->s_d_weights[i][j] = (float**)nalloc(cnn->k_size, sizeof(float*)); + cnn->v_d_weights[i][j] = (float**)nalloc(cnn->k_size, sizeof(float*)); + #endif for (int k=0; k < cnn->k_size; k++) { cnn->weights[i][j][k] = (float*)nalloc(cnn->k_size, sizeof(float)); cnn->d_weights[i][j][k] = (float*)nalloc(cnn->k_size, sizeof(float)); + #ifdef ADAM_CNN_WEIGHTS + cnn->s_d_weights[i][j][k] = (float*)nalloc(cnn->k_size, sizeof(float)); + cnn->v_d_weights[i][j][k] = (float*)nalloc(cnn->k_size, sizeof(float)); + #endif for (int l=0; l < cnn->k_size; l++) { (void) !fread(&tmp, sizeof(tmp), 1, ptr); cnn->weights[i][j][k][l] = tmp; cnn->d_weights[i][j][k][l] = 0.; + #ifdef ADAM_CNN_WEIGHTS + cnn->s_d_weights[i][j][k][l] = 0.; + cnn->v_d_weights[i][j][k][l] = 0.; + #endif } } } @@ -300,21 +336,41 @@ Kernel* read_kernel(int type_couche, int output_dim, FILE* ptr) { nn->bias = (float*)nalloc(nn->size_output, sizeof(float)); nn->d_bias = (float*)nalloc(nn->size_output, sizeof(float)); + #ifdef ADAM_DENSE_BIAS + nn->s_d_bias = (float*)nalloc(nn->size_output, sizeof(float)); + nn->v_d_bias = (float*)nalloc(nn->size_output, sizeof(float)); + #endif for (int i=0; i < nn->size_output; i++) { (void) !fread(&tmp, sizeof(tmp), 1, ptr); nn->bias[i] = tmp; nn->d_bias[i] = 0.; + #ifdef ADAM_DENSE_BIAS + nn->s_d_bias[i] = 0.; + nn->v_d_bias[i] = 0.; + #endif } nn->weights = (float**)nalloc(nn->size_input, sizeof(float*)); nn->d_weights = (float**)nalloc(nn->size_input, sizeof(float*)); + #ifdef ADAM_DENSE_WEIGHTS + nn->s_d_weights = (float**)nalloc(nn->size_input, sizeof(float*)); + nn->v_d_weights = (float**)nalloc(nn->size_input, sizeof(float*)); + #endif for (int i=0; i < nn->size_input; i++) { nn->weights[i] = (float*)nalloc(nn->size_output, sizeof(float)); nn->d_weights[i] = (float*)nalloc(nn->size_output, sizeof(float)); + #ifdef ADAM_DENSE_WEIGHTS + nn->s_d_weights[i] = (float*)nalloc(nn->size_output, sizeof(float)); + nn->v_d_weights[i] = (float*)nalloc(nn->size_output, sizeof(float)); + #endif for (int j=0; j < nn->size_output; j++) { (void) !fread(&tmp, sizeof(tmp), 1, ptr); nn->weights[i][j] = tmp; nn->d_weights[i][j] = 0.; + #ifdef ADAM_DENSE_WEIGHTS + nn->s_d_weights[i][j] = 0.; + nn->v_d_weights[i][j] = 0.; + #endif } } } else if (type_couche == POOLING) { // Cas du Pooling Layer diff --git a/src/cnn/update.c b/src/cnn/update.c index 5a1f572..f8fa448 100644 --- a/src/cnn/update.c +++ b/src/cnn/update.c @@ -1,8 +1,12 @@ #include +#include +#include #include "include/update.h" #include "include/struct.h" +#include "include/config.h" + float clip(float a) { if (a > NETWORK_CLIP_VALUE) { return NETWORK_CLIP_VALUE; @@ -34,7 +38,13 @@ void update_weights(Network* network, Network* d_network) { for (int b=0; b < output_depth; b++) { for (int c=0; c < k_size; c++) { for (int d=0; d < k_size; d++) { + #ifdef ADAM_CNN_WEIGHTS + d_cnn->v_d_weights[a][b][c][d] = BETA_1*d_cnn->v_d_weights[a][b][c][d] + (1-BETA_1)*d_cnn->d_weights[a][b][c][d]; + d_cnn->s_d_weights[a][b][c][d] = BETA_2*d_cnn->s_d_weights[a][b][c][d] + (1-BETA_2)*d_cnn->d_weights[a][b][c][d]*d_cnn->d_weights[a][b][c][d]; + cnn->weights[a][b][c][d] -= ALPHA*(d_cnn->v_d_weights[a][b][c][d]/sqrt(d_cnn->s_d_weights[a][b][c][d]+Epsilon)); + #else cnn->weights[a][b][c][d] -= network->learning_rate * d_cnn->d_weights[a][b][c][d]; + #endif d_cnn->d_weights[a][b][c][d] = 0; cnn->weights[a][b][c][d] = clip(cnn->weights[a][b][c][d]); @@ -49,7 +59,13 @@ void update_weights(Network* network, Network* d_network) { for (int a=0; a < input_width; a++) { for (int b=0; b < output_width; b++) { + #ifdef ADAM_DENSE_WEIGHTS + d_nn->v_d_weights[a][b] = BETA_1*d_nn->v_d_weights[a][b] + (1-BETA_1)*d_nn->d_weights[a][b]; + d_nn->s_d_weights[a][b] = BETA_2*d_nn->s_d_weights[a][b] + (1-BETA_2)*d_nn->d_weights[a][b]*d_nn->d_weights[a][b]; + nn->weights[a][b] -= ALPHA*(d_nn->v_d_weights[a][b]/sqrt(d_nn->s_d_weights[a][b]+Epsilon)); + #else nn->weights[a][b] -= network->learning_rate * d_nn->d_weights[a][b]; + #endif d_nn->d_weights[a][b] = 0; } } @@ -61,7 +77,13 @@ void update_weights(Network* network, Network* d_network) { for (int a=0; a < size_input; a++) { for (int b=0; b < output_width; b++) { + #ifdef ADAM_DENSE_WEIGHTS + d_nn->v_d_weights[a][b] = BETA_1*d_nn->v_d_weights[a][b] + (1-BETA_1)*d_nn->d_weights[a][b]; + d_nn->s_d_weights[a][b] = BETA_2*d_nn->s_d_weights[a][b] + (1-BETA_2)*d_nn->d_weights[a][b]*d_nn->d_weights[a][b]; + nn->weights[a][b] -= ALPHA*(d_nn->d_weights[a][b]/sqrt(d_nn->s_d_weights[a][b]+Epsilon)); + #else nn->weights[a][b] -= network->learning_rate * d_nn->d_weights[a][b]; + #endif d_nn->d_weights[a][b] = 0; nn->weights[a][b] = clip(nn->weights[a][b]); @@ -89,7 +111,12 @@ void update_bias(Network* network, Network* d_network) { for (int a=0; a < output_depth; a++) { for (int b=0; b < output_width; b++) { for (int c=0; c < output_width; c++) { + #ifdef ADAM_CNN_BIAS + d_cnn->s_d_bias[a][b][c] = BETA_2*d_cnn->s_d_bias[a][b][c] + (1-BETA_2)*d_cnn->d_bias[a][b][c]*d_cnn->d_bias[a][b][c]; + cnn->bias[a][b][c] -= ALPHA*(d_cnn->d_bias[a][b][c]/sqrt(d_cnn->s_d_bias[a][b][c]+Epsilon)); + #else cnn->bias[a][b][c] -= network->learning_rate * d_cnn->d_bias[a][b][c]; + #endif d_cnn->d_bias[a][b][c] = 0; cnn->bias[a][b][c] = clip(cnn->bias[a][b][c]); @@ -101,7 +128,12 @@ void update_bias(Network* network, Network* d_network) { Kernel_nn* d_nn = dk_i->nn; for (int a=0; a < output_width; a++) { + #ifdef ADAM_DENSE_BIAS + d_nn->s_d_bias[a] = BETA_2*d_nn->s_d_bias[a] + (1-BETA_2)*d_nn->d_bias[a]*d_nn->d_bias[a]; + nn->bias[a] -= ALPHA*(d_nn->d_bias[a]/sqrt(d_nn->s_d_bias[a]+Epsilon)); + #else nn->bias[a] -= network->learning_rate * d_nn->d_bias[a]; + #endif d_nn->d_bias[a] = 0; nn->bias[a] = clip(nn->bias[a]); diff --git a/src/cnn/utils.c b/src/cnn/utils.c index 1a211ba..8de8b51 100644 --- a/src/cnn/utils.c +++ b/src/cnn/utils.c @@ -148,19 +148,39 @@ Network* copy_network(Network* network) { network_cp->kernel[i]->nn->bias = (float*)nalloc(size_output, sizeof(float)); network_cp->kernel[i]->nn->d_bias = (float*)nalloc(size_output, sizeof(float)); + #ifdef ADAM_DENSE_BIAS + network_cp->kernel[i]->nn->s_d_bias = (float*)nalloc(size_output, sizeof(float)); + network_cp->kernel[i]->nn->v_d_bias = (float*)nalloc(size_output, sizeof(float)); + #endif for (int j=0; j < size_output; j++) { copyVar(kernel[i]->nn->bias[j]); network_cp->kernel[i]->nn->d_bias[j] = 0.; + #ifdef ADAM_DENSE_BIAS + network_cp->kernel[i]->nn->s_d_bias[j] = 0.; + network_cp->kernel[i]->nn->v_d_bias[j] = 0.; + #endif } network_cp->kernel[i]->nn->weights = (float**)nalloc(size_input, sizeof(float*)); network_cp->kernel[i]->nn->d_weights = (float**)nalloc(size_input, sizeof(float*)); + #ifdef ADAM_DENSE_WEIGHTS + network_cp->kernel[i]->nn->s_d_weights = (float**)nalloc(size_input, sizeof(float*)); + network_cp->kernel[i]->nn->v_d_weights = (float**)nalloc(size_input, sizeof(float*)); + #endif for (int j=0; j < size_input; j++) { network_cp->kernel[i]->nn->weights[j] = (float*)nalloc(size_output, sizeof(float)); network_cp->kernel[i]->nn->d_weights[j] = (float*)nalloc(size_output, sizeof(float)); + #ifdef ADAM_DENSE_WEIGHTS + network_cp->kernel[i]->nn->s_d_weights[j] = (float*)nalloc(size_output, sizeof(float)); + network_cp->kernel[i]->nn->v_d_weights[j] = (float*)nalloc(size_output, sizeof(float)); + #endif for (int k=0; k < size_output; k++) { copyVar(kernel[i]->nn->weights[j][k]); network_cp->kernel[i]->nn->d_weights[j][k] = 0.; + #ifdef ADAM_DENSE_WEIGHTS + network_cp->kernel[i]->nn->s_d_weights[j][k] = 0.; + network_cp->kernel[i]->nn->v_d_weights[j][k] = 0.; + #endif } } } @@ -184,33 +204,69 @@ Network* copy_network(Network* network) { network_cp->kernel[i]->cnn->bias = (float***)nalloc(columns, sizeof(float**)); network_cp->kernel[i]->cnn->d_bias = (float***)nalloc(columns, sizeof(float**)); + #ifdef ADAM_CNN_BIAS + network_cp->kernel[i]->cnn->s_d_bias = (float***)nalloc(columns, sizeof(float**)); + network_cp->kernel[i]->cnn->v_d_bias = (float***)nalloc(columns, sizeof(float**)); + #endif for (int j=0; j < columns; j++) { network_cp->kernel[i]->cnn->bias[j] = (float**)nalloc(output_dim, sizeof(float*)); network_cp->kernel[i]->cnn->d_bias[j] = (float**)nalloc(output_dim, sizeof(float*)); + #ifdef ADAM_CNN_BIAS + network_cp->kernel[i]->cnn->s_d_bias[j] = (float**)nalloc(output_dim, sizeof(float*)); + network_cp->kernel[i]->cnn->v_d_bias[j] = (float**)nalloc(output_dim, sizeof(float*)); + #endif for (int k=0; k < output_dim; k++) { network_cp->kernel[i]->cnn->bias[j][k] = (float*)nalloc(output_dim, sizeof(float)); network_cp->kernel[i]->cnn->d_bias[j][k] = (float*)nalloc(output_dim, sizeof(float)); + #ifdef ADAM_CNN_BIAS + network_cp->kernel[i]->cnn->s_d_bias[j][k] = (float*)nalloc(output_dim, sizeof(float)); + network_cp->kernel[i]->cnn->v_d_bias[j][k] = (float*)nalloc(output_dim, sizeof(float)); + #endif for (int l=0; l < output_dim; l++) { copyVar(kernel[i]->cnn->bias[j][k][l]); network_cp->kernel[i]->cnn->d_bias[j][k][l] = 0.; + #ifdef ADAM_CNN_BIAS + network_cp->kernel[i]->cnn->s_d_bias[j][k][l] = 0.; + network_cp->kernel[i]->cnn->v_d_bias[j][k][l] = 0.; + #endif } } } network_cp->kernel[i]->cnn->weights = (float****)nalloc(rows, sizeof(float***)); network_cp->kernel[i]->cnn->d_weights = (float****)nalloc(rows, sizeof(float***)); + #ifdef ADAM_CNN_WEIGHTS + network_cp->kernel[i]->cnn->s_d_weights = (float****)nalloc(rows, sizeof(float***)); + network_cp->kernel[i]->cnn->v_d_weights = (float****)nalloc(rows, sizeof(float***)); + #endif for (int j=0; j < rows; j++) { network_cp->kernel[i]->cnn->weights[j] = (float***)nalloc(columns, sizeof(float**)); network_cp->kernel[i]->cnn->d_weights[j] = (float***)nalloc(columns, sizeof(float**)); + #ifdef ADAM_CNN_WEIGHTS + network_cp->kernel[i]->cnn->s_d_weights[j] = (float***)nalloc(columns, sizeof(float**)); + network_cp->kernel[i]->cnn->v_d_weights[j] = (float***)nalloc(columns, sizeof(float**)); + #endif for (int k=0; k < columns; k++) { network_cp->kernel[i]->cnn->weights[j][k] = (float**)nalloc(k_size, sizeof(float*)); network_cp->kernel[i]->cnn->d_weights[j][k] = (float**)nalloc(k_size, sizeof(float*)); + #ifdef ADAM_CNN_WEIGHTS + network_cp->kernel[i]->cnn->s_d_weights[j][k] = (float**)nalloc(k_size, sizeof(float*)); + network_cp->kernel[i]->cnn->v_d_weights[j][k] = (float**)nalloc(k_size, sizeof(float*)); + #endif for (int l=0; l < k_size; l++) { network_cp->kernel[i]->cnn->weights[j][k][l] = (float*)nalloc(k_size, sizeof(float)); network_cp->kernel[i]->cnn->d_weights[j][k][l] = (float*)nalloc(k_size, sizeof(float)); + #ifdef ADAM_CNN_WEIGHTS + network_cp->kernel[i]->cnn->s_d_weights[j][k][l] = (float*)nalloc(k_size, sizeof(float)); + network_cp->kernel[i]->cnn->v_d_weights[j][k][l] = (float*)nalloc(k_size, sizeof(float)); + #endif for (int m=0; m < k_size; m++) { copyVar(kernel[i]->cnn->weights[j][k][l][m]); network_cp->kernel[i]->cnn->d_weights[j][k][l][m] = 0.; + #ifdef ADAM_CNN_WEIGHTS + network_cp->kernel[i]->cnn->s_d_weights[j][k][l][m] = 0.; + network_cp->kernel[i]->cnn->v_d_weights[j][k][l][m] = 0.; + #endif } } } diff --git a/src/scripts/convolution_benchmark.cu b/src/scripts/convolution_benchmark.cu index 11b5b03..4650cad 100644 --- a/src/scripts/convolution_benchmark.cu +++ b/src/scripts/convolution_benchmark.cu @@ -1,3 +1,5 @@ +//! This file uses an old implementation of convolution which uses linearised matrices +//! It is therefore not compatible nor compilable now. #include #include #include @@ -114,17 +116,33 @@ void run_convolution_test(int input_dim, int output_dim, int rows, int columns) // bias[kernel->columns] kernel->bias = (float*)malloc(kernel->columns, sizeof(float)); kernel->d_bias = (float*)malloc(kernel->columns, sizeof(float)); + #ifdef ADAM_CNN_BIAS + kernel->s_d_bias = (float*)malloc(kernel->columns, sizeof(float)); + kernel->v_d_bias = (float*)malloc(kernel->columns, sizeof(float)); + #endif for (int i=0; icolumns; i++) { kernel->bias[i] = random_float(0.0f, 15.0f); kernel->d_bias[i] = random_float(0.0f, 1.5f); + #ifdef ADAM_CNN_BIAS + kernel->s_d_bias[i] = random_float(0.0f, 1.5f); + kernel->v_d_bias[i] = random_float(0.0f, 1.5f); + #endif } // weights[rows][columns][k_size][k_size] kernel->weights = (float****)malloc(sizeof(float***)*kernel->rows); kernel->d_weights = (float****)malloc(sizeof(float***)*kernel->rows); + #ifdef ADAM_CNN_WEIGHTS + kernel->s_d_weights = (float****)malloc(sizeof(float***)*kernel->rows); + kernel->v_d_weights = (float****)malloc(sizeof(float***)*kernel->rows); + #endif for (int i=0; i < kernel->rows; i++) { kernel->weights[i] = create_matrix(kernel->columns, kernel->k_size, kernel->k_size, 15.0f); kernel->d_weights[i] = create_matrix(kernel->columns, kernel->k_size, kernel->k_size, 1.5f); + #ifdef ADAM_CNN_WEIGHTS + kernel->s_d_weights[i] = create_matrix(kernel->columns, kernel->k_size, kernel->k_size, 1.5f); + kernel->v_d_weights[i] = create_matrix(kernel->columns, kernel->k_size, kernel->k_size, 1.5f); + #endif } float*** input = create_matrix(kernel->rows, input_dim, input_dim, 5.0f); @@ -162,13 +180,25 @@ void run_convolution_test(int input_dim, int output_dim, int rows, int columns) free(kernel->bias); free(kernel->d_bias); + #ifdef ADAM_CNN_BIAS + free(kernel->s_d_bias); + free(kernel->v_d_bias); + #endif for (int i=0; i < kernel->rows; i++) { free_matrix(kernel->weights[i], kernel->columns, kernel->k_size); free_matrix(kernel->d_weights[i], kernel->columns, kernel->k_size); + #ifdef ADAM_CNN_WEIGHTS + free_matrix(kernel->s_d_weights[i], kernel->columns, kernel->k_size); + free_matrix(kernel->v_d_weights[i], kernel->columns, kernel->k_size); + #endif } free(kernel->weights); free(kernel->d_weights); + #ifdef ADAM_CNN_WEIGHTS + free(kernel->s_d_weights); + free(kernel->v_d_weights); + #endif free_matrix(input, kernel->rows, input_dim); free_matrix(output_cpu, kernel->columns, output_dim); diff --git a/test/cnn_convolution.cu b/test/cnn_convolution.cu index 049657b..9716e62 100644 --- a/test/cnn_convolution.cu +++ b/test/cnn_convolution.cu @@ -107,13 +107,25 @@ void run_convolution_test(int input_dim, int output_dim, int rows, int columns) // bias[kernel->columns][dim_output][dim_output] kernel->bias = create_matrix(kernel->columns, output_dim, output_dim, 15.0f); kernel->d_bias = create_matrix(kernel->columns, output_dim, output_dim, 1.5f); + #ifdef ADAM_CNN_BIAS + kernel->s_d_bias = create_matrix(kernel->columns, output_dim, output_dim, 1.5f); + kernel->v_d_bias = create_matrix(kernel->columns, output_dim, output_dim, 1.5f); + #endif // weights[rows][columns][k_size][k_size] kernel->weights = (float****)nalloc(kernel->rows, sizeof(float***)); kernel->d_weights = (float****)nalloc(kernel->rows, sizeof(float***)); + #ifdef ADAM_CNN_WEIGHTS + kernel->s_d_weights = (float****)nalloc(kernel->rows, sizeof(float***)); + kernel->v_d_weights = (float****)nalloc(kernel->rows, sizeof(float***)); + #endif for (int i=0; i < kernel->rows; i++) { kernel->weights[i] = create_matrix(kernel->columns, kernel->k_size, kernel->k_size, 15.0f); kernel->d_weights[i] = create_matrix(kernel->columns, kernel->k_size, kernel->k_size, 1.5f); + #ifdef ADAM_CNN_WEIGHTS + kernel->s_d_weights[i] = create_matrix(kernel->columns, kernel->k_size, kernel->k_size, 1.5f); + kernel->v_d_weights[i] = create_matrix(kernel->columns, kernel->k_size, kernel->k_size, 1.5f); + #endif } float*** input = create_matrix(kernel->rows, input_dim, input_dim, 5.0f); @@ -152,13 +164,25 @@ void run_convolution_test(int input_dim, int output_dim, int rows, int columns) free_matrix(kernel->bias, kernel->columns, output_dim); free_matrix(kernel->d_bias, kernel->columns, output_dim); + #ifdef ADAM_CNN_BIAS + free_matrix(kernel->s_d_bias, kernel->columns, output_dim); + free_matrix(kernel->v_d_bias, kernel->columns, output_dim); + #endif for (int i=0; i < kernel->rows; i++) { free_matrix(kernel->weights[i], kernel->columns, kernel->k_size); free_matrix(kernel->d_weights[i], kernel->columns, kernel->k_size); + #ifdef ADAM_CNN_WEIGHTS + free_matrix(kernel->s_d_weights[i], kernel->columns, kernel->k_size); + free_matrix(kernel->v_d_weights[i], kernel->columns, kernel->k_size); + #endif } gree(kernel->weights); gree(kernel->d_weights); + #ifdef ADAM_CNN_WEIGHTS + gree(kernel->s_d_weights); + gree(kernel->v_d_weights); + #endif free_matrix(input, kernel->rows, input_dim); free_matrix(output_cpu, kernel->columns, output_dim);