diff --git a/src/cnn/creation.c b/src/cnn/creation.c
index a95344c..3c488f4 100644
--- a/src/cnn/creation.c
+++ b/src/cnn/creation.c
@@ -165,31 +165,67 @@ void add_convolution(Network* network, int depth_output, int dim_output, int act
     cnn->columns = depth_output;
     cnn->weights = (float****)nalloc(depth_input, sizeof(float***));
     cnn->d_weights = (float****)nalloc(depth_input, sizeof(float***));
+    #ifdef ADAM_CNN_WEIGHTS
+    cnn->s_d_weights = (float****)nalloc(depth_input, sizeof(float***));
+    cnn->v_d_weights = (float****)nalloc(depth_input, sizeof(float***));
+    #endif
     for (int i=0; i < depth_input; i++) {
         cnn->weights[i] = (float***)nalloc(depth_output, sizeof(float**));
         cnn->d_weights[i] = (float***)nalloc(depth_output, sizeof(float**));
+        #ifdef ADAM_CNN_WEIGHTS
+        cnn->s_d_weights[i] = (float***)nalloc(depth_output, sizeof(float**));
+        cnn->v_d_weights[i] = (float***)nalloc(depth_output, sizeof(float**));
+        #endif
         for (int j=0; j < depth_output; j++) {
             cnn->weights[i][j] = (float**)nalloc(kernel_size, sizeof(float*));
             cnn->d_weights[i][j] = (float**)nalloc(kernel_size, sizeof(float*));
+            #ifdef ADAM_CNN_WEIGHTS
+            cnn->s_d_weights[i][j] = (float**)nalloc(kernel_size, sizeof(float*));
+            cnn->v_d_weights[i][j] = (float**)nalloc(kernel_size, sizeof(float*));
+            #endif
             for (int k=0; k < kernel_size; k++) {
                 cnn->weights[i][j][k] = (float*)nalloc(kernel_size, sizeof(float));
                 cnn->d_weights[i][j][k] = (float*)nalloc(kernel_size, sizeof(float));
+                #ifdef ADAM_CNN_WEIGHTS
+                cnn->s_d_weights[i][j][k] = (float*)nalloc(kernel_size, sizeof(float));
+                cnn->v_d_weights[i][j][k] = (float*)nalloc(kernel_size, sizeof(float));
+                #endif
                 for (int l=0; l < kernel_size; l++) {
                     cnn->d_weights[i][j][k][l] = 0.;
+                    #ifdef ADAM_CNN_WEIGHTS
+                    cnn->s_d_weights[i][j][k][l] = 0.;
+                    cnn->v_d_weights[i][j][k][l] = 0.;
+                    #endif
                 }
             }
         }
     }
     cnn->bias = (float***)nalloc(depth_output, sizeof(float**));
     cnn->d_bias = (float***)nalloc(depth_output, sizeof(float**));
+    #ifdef ADAM_CNN_BIAS
+    cnn->s_d_bias = (float***)nalloc(depth_output, sizeof(float**));
+    cnn->v_d_bias = (float***)nalloc(depth_output, sizeof(float**));
+    #endif
     for (int i=0; i < depth_output; i++) {
         cnn->bias[i] = (float**)nalloc(bias_size, sizeof(float*));
         cnn->d_bias[i] = (float**)nalloc(bias_size, sizeof(float*));
+        #ifdef ADAM_CNN_BIAS
+        cnn->s_d_bias[i] = (float**)nalloc(bias_size, sizeof(float*));
+        cnn->v_d_bias[i] = (float**)nalloc(bias_size, sizeof(float*));
+        #endif
         for (int j=0; j < bias_size; j++) {
             cnn->bias[i][j] = (float*)nalloc(bias_size, sizeof(float));
             cnn->d_bias[i][j] = (float*)nalloc(bias_size, sizeof(float));
+            #ifdef ADAM_CNN_BIAS
+            cnn->s_d_bias[i][j] = (float*)nalloc(bias_size, sizeof(float));
+            cnn->v_d_bias[i][j] = (float*)nalloc(bias_size, sizeof(float));
+            #endif
             for (int k=0; k < bias_size; k++) {
                 cnn->d_bias[i][j][k] = 0.;
+                #ifdef ADAM_CNN_BIAS
+                cnn->s_d_bias[i][j][k] = 0.;
+                cnn->v_d_bias[i][j][k] = 0.;
+                #endif
             }
         }
     }
@@ -220,17 +256,37 @@ void add_dense(Network* network, int size_output, int activation) {
     nn->size_output = size_output;
     nn->bias = (float*)nalloc(size_output, sizeof(float));
     nn->d_bias = (float*)nalloc(size_output, sizeof(float));
+    #ifdef ADAM_DENSE_BIAS
+    nn->s_d_bias = (float*)nalloc(size_output, sizeof(float));
+    nn->v_d_bias = (float*)nalloc(size_output, sizeof(float));
+    #endif
     for (int i=0; i < size_output; i++) {
         nn->d_bias[i] = 0.;
+        #ifdef ADAM_DENSE_BIAS
+        nn->s_d_bias[i] = 0.;
+        nn->v_d_bias[i] = 0.;
+        #endif
     }
 
     nn->weights = (float**)nalloc(size_input, sizeof(float*));
     nn->d_weights = (float**)nalloc(size_input, sizeof(float*));
+    #ifdef ADAM_DENSE_WEIGHTS
+    nn->s_d_weights = (float**)nalloc(size_input, sizeof(float*));
+    nn->v_d_weights = (float**)nalloc(size_input, sizeof(float*));
+    #endif
     for (int i=0; i < size_input; i++) {
         nn->weights[i] = (float*)nalloc(size_output, sizeof(float));
         nn->d_weights[i] = (float*)nalloc(size_output, sizeof(float));
+        #ifdef ADAM_DENSE_WEIGHTS
+        nn->s_d_weights[i] = (float*)nalloc(size_output, sizeof(float));
+        nn->v_d_weights[i] = (float*)nalloc(size_output, sizeof(float));
+        #endif
         for (int j=0; j < size_output; j++) {
             nn->d_weights[i][j] = 0.;
+            #ifdef ADAM_DENSE_WEIGHTS
+            nn->s_d_weights[i][j] = 0.;
+            nn->v_d_weights[i][j] = 0.;
+            #endif
         }
     }
     
@@ -262,16 +318,36 @@ void add_dense_linearisation(Network* network, int size_output, int activation)
 
     nn->bias = (float*)nalloc(size_output, sizeof(float));
     nn->d_bias = (float*)nalloc(size_output, sizeof(float));
+    #ifdef ADAM_DENSE_BIAS
+    nn->s_d_bias = (float*)nalloc(size_output, sizeof(float));
+    nn->v_d_bias = (float*)nalloc(size_output, sizeof(float));
+    #endif
     for (int i=0; i < size_output; i++) {
         nn->d_bias[i] = 0.;
+        #ifdef ADAM_DENSE_BIAS
+        nn->s_d_bias[i] = 0.;
+        nn->v_d_bias[i] = 0.;
+        #endif
     }
     nn->weights = (float**)nalloc(size_input, sizeof(float*));
     nn->d_weights = (float**)nalloc(size_input, sizeof(float*));
+    #ifdef ADAM_DENSE_WEIGHTS
+    nn->s_d_weights = (float**)nalloc(size_input, sizeof(float*));
+    nn->v_d_weights = (float**)nalloc(size_input, sizeof(float*));
+    #endif
     for (int i=0; i < size_input; i++) {
         nn->weights[i] = (float*)nalloc(size_output, sizeof(float));
         nn->d_weights[i] = (float*)nalloc(size_output, sizeof(float));
+        #ifdef ADAM_DENSE_WEIGHTS
+        nn->s_d_weights[i] = (float*)nalloc(size_output, sizeof(float));
+        nn->v_d_weights[i] = (float*)nalloc(size_output, sizeof(float));
+        #endif
         for (int j=0; j < size_output; j++) {
             nn->d_weights[i][j] = 0.;
+            #ifdef ADAM_DENSE_WEIGHTS
+            nn->s_d_weights[i][j] = 0.;
+            nn->v_d_weights[i][j] = 0.;
+            #endif
         }
     }
     initialisation_1d_matrix(network->initialisation, nn->bias, size_output, size_input, size_output);
diff --git a/src/cnn/free.c b/src/cnn/free.c
index b9a4943..539b78e 100644
--- a/src/cnn/free.c
+++ b/src/cnn/free.c
@@ -42,27 +42,55 @@ void free_convolution(Network* network, int pos) {
         for (int j=0; j < bias_size; j++) {
             gree(k_pos->bias[i][j]);
             gree(k_pos->d_bias[i][j]);
+            #ifdef ADAM_CNN_BIAS
+            gree(k_pos->s_d_bias[i][j]);
+            gree(k_pos->v_d_bias[i][j]);
+            #endif
         }
         gree(k_pos->bias[i]);
         gree(k_pos->d_bias[i]);
+        #ifdef ADAM_CNN_BIAS
+        gree(k_pos->s_d_bias[i]);
+        gree(k_pos->v_d_bias[i]);
+        #endif
     }
     gree(k_pos->bias);
     gree(k_pos->d_bias);
+    #ifdef ADAM_CNN_BIAS
+    gree(k_pos->s_d_bias);
+    gree(k_pos->v_d_bias);
+    #endif
 
     for (int i=0; i < r; i++) {
         for (int j=0; j < c; j++) {
             for (int k=0; k < k_size; k++) {
                 gree(k_pos->weights[i][j][k]);
                 gree(k_pos->d_weights[i][j][k]);
+                #ifdef ADAM_CNN_WEIGHTS
+                gree(k_pos->s_d_weights[i][j][k]);
+                gree(k_pos->v_d_weights[i][j][k]);
+                #endif
             }
             gree(k_pos->weights[i][j]);
             gree(k_pos->d_weights[i][j]);
+            #ifdef ADAM_CNN_WEIGHTS
+            gree(k_pos->s_d_weights[i][j]);
+            gree(k_pos->v_d_weights[i][j]);
+            #endif
         }
         gree(k_pos->weights[i]);
         gree(k_pos->d_weights[i]);
+        #ifdef ADAM_CNN_WEIGHTS
+        gree(k_pos->s_d_weights[i]);
+        gree(k_pos->v_d_weights[i]);
+        #endif
     }
     gree(k_pos->weights);
     gree(k_pos->d_weights);
+    #ifdef ADAM_CNN_WEIGHTS
+    gree(k_pos->s_d_weights);
+    gree(k_pos->v_d_weights);
+    #endif
 
     gree(k_pos);
 }
@@ -74,12 +102,24 @@ void free_dense(Network* network, int pos) {
     for (int i=0; i < dim; i++) {
         gree(k_pos->weights[i]);
         gree(k_pos->d_weights[i]);
+        #ifdef ADAM_DENSE_WEIGHTS
+        gree(k_pos->s_d_weights[i]);
+        gree(k_pos->v_d_weights[i]);
+        #endif
     }
     gree(k_pos->weights);
     gree(k_pos->d_weights);
+    #ifdef ADAM_DENSE_WEIGHTS
+    gree(k_pos->s_d_weights);
+    gree(k_pos->v_d_weights);
+    #endif
 
     gree(k_pos->bias);
     gree(k_pos->d_bias);
+    #ifdef ADAM_DENSE_BIAS
+    gree(k_pos->s_d_bias);
+    gree(k_pos->v_d_bias);
+    #endif
 
     gree(k_pos);
 }
@@ -91,12 +131,24 @@ void free_dense_linearisation(Network* network, int pos) {
     for (int i=0; i < dim; i++) {
         gree(k_pos->weights[i]);
         gree(k_pos->d_weights[i]);
+        #ifdef ADAM_DENSE_WEIGHTS
+        gree(k_pos->s_d_weights[i]);
+        gree(k_pos->v_d_weights[i]);
+        #endif
     }
     gree(k_pos->weights);
     gree(k_pos->d_weights);
+    #ifdef ADAM_DENSE_WEIGHTS
+    gree(k_pos->s_d_weights);
+    gree(k_pos->v_d_weights);
+    #endif
 
     gree(k_pos->bias);
     gree(k_pos->d_bias);
+    #ifdef ADAM_DENSE_BIAS
+    gree(k_pos->s_d_bias);
+    gree(k_pos->v_d_bias);
+    #endif
 
     gree(k_pos);
 }
diff --git a/src/cnn/include/config.h b/src/cnn/include/config.h
index 92daf00..6c4535e 100644
--- a/src/cnn/include/config.h
+++ b/src/cnn/include/config.h
@@ -1,12 +1,27 @@
 #ifndef DEF_CONFIG_H
 #define DEF_CONFIG_H
 
+
 //* Paramètres d'entraînement
 #define EPOCHS 10 // Nombre d'époques par défaut (itérations sur toutes les images)
 #define BATCHES 32 // Nombre d'images à voir avant de mettre le réseau à jour
 #define LEARNING_RATE 3e-4 // Taux d'apprentissage
 #define USE_MULTITHREADING // Commenter pour utiliser un seul coeur durant l'apprentissage (meilleur pour des tailles de batchs traités rapidement)
 
+//* Paramètres d'ADAM optimizer
+#define ALPHA 3e-4
+#define BETA_1 0.9
+#define BETA_2 0.999
+#define Epsilon 1e-7
+
+//* Options d'ADAM optimizer
+//* Activer ou désactiver Adam sur les couches dense
+//#define ADAM_DENSE_WEIGHTS
+//#define ADAM_DENSE_BIAS
+//* Activer ou désactiver Adam sur les couches convolutives
+//#define ADAM_CNN_WEIGHTS
+//#define ADAM_CNN_BIAS
+
 
 //* Paramètre d'optimisation pour un dataset Jpeg
 // keep images in ram e.g re-read and decompress each time
diff --git a/src/cnn/include/struct.h b/src/cnn/include/struct.h
index 90df7eb..1f04c43 100644
--- a/src/cnn/include/struct.h
+++ b/src/cnn/include/struct.h
@@ -1,6 +1,8 @@
 #ifndef DEF_STRUCT_H
 #define DEF_STRUCT_H
 
+#include "config.h"
+
 #define NO_POOLING 0
 #define AVG_POOLING 1
 #define MAX_POOLING 2
@@ -15,8 +17,16 @@ typedef struct Kernel_cnn {
     int columns; // Depth de l'output
     float*** bias; // bias[columns][dim_output][dim_output]
     float*** d_bias; // d_bias[columns][dim_output][dim_output]
+    #ifdef ADAM_CNN_BIAS
+    float*** s_d_bias; // s_d_bias[columns][dim_output][dim_output]
+    float*** v_d_bias; // v_d_bias[columns][dim_output][dim_output]
+    #endif
     float**** weights; // weights[rows][columns][k_size][k_size]
     float**** d_weights; // d_weights[rows][columns][k_size][k_size]
+    #ifdef ADAM_CNN_WEIGHTS
+    float**** s_d_weights; // s_d_weights[rows][columns][k_size][k_size]
+    float**** v_d_weights; // v_d_weights[rows][columns][k_size][k_size]
+    #endif
 } Kernel_cnn;
 
 typedef struct Kernel_nn {
@@ -25,8 +35,16 @@ typedef struct Kernel_nn {
     int size_output; // Nombre d'éléments en sortie
     float* bias; // bias[size_output]
     float* d_bias; // d_bias[size_output]
+    #ifdef ADAM_DENSE_BIAS
+    float* s_d_bias; // s_d_bias[size_output]
+    float* v_d_bias; // v_d_bias[size_output]
+    #endif
     float** weights; // weight[size_input][size_output]
     float** d_weights; // d_weights[size_input][size_output]
+    #ifdef ADAM_DENSE_WEIGHTS
+    float** s_d_weights; // s_d_weights[size_input][size_output]
+    float** v_d_weights; // v_d_weights[size_input][size_output]
+    #endif
 } Kernel_nn;
 
 typedef struct Kernel {
diff --git a/src/cnn/neuron_io.c b/src/cnn/neuron_io.c
index c29fdc1..a009f59 100644
--- a/src/cnn/neuron_io.c
+++ b/src/cnn/neuron_io.c
@@ -249,35 +249,71 @@ Kernel* read_kernel(int type_couche, int output_dim, FILE* ptr) {
 
         cnn->bias = (float***)nalloc(cnn->columns, sizeof(float**));
         cnn->d_bias = (float***)nalloc(cnn->columns, sizeof(float**));
+        #ifdef ADAM_CNN_BIAS
+        cnn->s_d_bias = (float***)nalloc(cnn->columns, sizeof(float**));
+        cnn->v_d_bias = (float***)nalloc(cnn->columns, sizeof(float**));
+        #endif
         for (int i=0; i < cnn->columns; i++) {
             cnn->bias[i] = (float**)nalloc(output_dim, sizeof(float*));
             cnn->d_bias[i] = (float**)nalloc(output_dim, sizeof(float*));
+            #ifdef ADAM_CNN_BIAS
+            cnn->s_d_bias[i] = (float**)nalloc(output_dim, sizeof(float*));
+            cnn->v_d_bias[i] = (float**)nalloc(output_dim, sizeof(float*));
+            #endif
             for (int j=0; j < output_dim; j++) {
                 cnn->bias[i][j] = (float*)nalloc(output_dim, sizeof(float));
                 cnn->d_bias[i][j] = (float*)nalloc(output_dim, sizeof(float));
+                #ifdef ADAM_CNN_BIAS
+                cnn->s_d_bias[i][j] = (float*)nalloc(output_dim, sizeof(float));
+                cnn->v_d_bias[i][j] = (float*)nalloc(output_dim, sizeof(float));
+                #endif
                 for (int k=0; k < output_dim; k++) {
                     (void) !fread(&tmp, sizeof(tmp), 1, ptr);
                     cnn->bias[i][j][k] = tmp;
                     cnn->d_bias[i][j][k] = 0.;
+                    #ifdef ADAM_CNN_BIAS
+                    cnn->s_d_bias[i][j][k] = 0.;
+                    cnn->v_d_bias[i][j][k] = 0.;
+                    #endif
                 }
             }
         }
 
         cnn->weights = (float****)nalloc(cnn->rows, sizeof(float***));
         cnn->d_weights = (float****)nalloc(cnn->rows, sizeof(float***));
+        #ifdef ADAM_CNN_WEIGHTS
+        cnn->s_d_weights = (float****)nalloc(cnn->rows, sizeof(float***));
+        cnn->v_d_weights = (float****)nalloc(cnn->rows, sizeof(float***));
+        #endif
         for (int i=0; i < cnn->rows; i++) {
             cnn->weights[i] = (float***)nalloc(cnn->columns, sizeof(float**));
             cnn->d_weights[i] = (float***)nalloc(cnn->columns, sizeof(float**));
+            #ifdef ADAM_CNN_WEIGHTS
+            cnn->s_d_weights[i] = (float***)nalloc(cnn->columns, sizeof(float**));
+            cnn->v_d_weights[i] = (float***)nalloc(cnn->columns, sizeof(float**));
+            #endif
             for (int j=0; j < cnn->columns; j++) {
                 cnn->weights[i][j] = (float**)nalloc(cnn->k_size, sizeof(float*));
                 cnn->d_weights[i][j] = (float**)nalloc(cnn->k_size, sizeof(float*));
+                #ifdef ADAM_CNN_WEIGHTS
+                cnn->s_d_weights[i][j] = (float**)nalloc(cnn->k_size, sizeof(float*));
+                cnn->v_d_weights[i][j] = (float**)nalloc(cnn->k_size, sizeof(float*));
+                #endif
                 for (int k=0; k < cnn->k_size; k++) {
                     cnn->weights[i][j][k] = (float*)nalloc(cnn->k_size, sizeof(float));
                     cnn->d_weights[i][j][k] = (float*)nalloc(cnn->k_size, sizeof(float));
+                    #ifdef ADAM_CNN_WEIGHTS
+                    cnn->s_d_weights[i][j][k] = (float*)nalloc(cnn->k_size, sizeof(float));
+                    cnn->v_d_weights[i][j][k] = (float*)nalloc(cnn->k_size, sizeof(float));
+                    #endif
                     for (int l=0; l < cnn->k_size; l++) {
                         (void) !fread(&tmp, sizeof(tmp), 1, ptr);
                         cnn->weights[i][j][k][l] = tmp;
                         cnn->d_weights[i][j][k][l] = 0.;
+                        #ifdef ADAM_CNN_WEIGHTS
+                        cnn->s_d_weights[i][j][k][l] = 0.;
+                        cnn->v_d_weights[i][j][k][l] = 0.;
+                        #endif
                     }
                 }
             }
@@ -300,21 +336,41 @@ Kernel* read_kernel(int type_couche, int output_dim, FILE* ptr) {
 
         nn->bias = (float*)nalloc(nn->size_output, sizeof(float));
         nn->d_bias = (float*)nalloc(nn->size_output, sizeof(float));
+        #ifdef ADAM_DENSE_BIAS
+        nn->s_d_bias = (float*)nalloc(nn->size_output, sizeof(float));
+        nn->v_d_bias = (float*)nalloc(nn->size_output, sizeof(float));
+        #endif
         for (int i=0; i < nn->size_output; i++) {
             (void) !fread(&tmp, sizeof(tmp), 1, ptr);
             nn->bias[i] = tmp;
             nn->d_bias[i] = 0.;
+            #ifdef ADAM_DENSE_BIAS
+            nn->s_d_bias[i] = 0.;
+            nn->v_d_bias[i] = 0.;
+            #endif
         }
 
         nn->weights = (float**)nalloc(nn->size_input, sizeof(float*));
         nn->d_weights = (float**)nalloc(nn->size_input, sizeof(float*));
+        #ifdef ADAM_DENSE_WEIGHTS
+        nn->s_d_weights = (float**)nalloc(nn->size_input, sizeof(float*));
+        nn->v_d_weights = (float**)nalloc(nn->size_input, sizeof(float*));
+        #endif
         for (int i=0; i < nn->size_input; i++) {
             nn->weights[i] = (float*)nalloc(nn->size_output, sizeof(float));
             nn->d_weights[i] = (float*)nalloc(nn->size_output, sizeof(float));
+            #ifdef ADAM_DENSE_WEIGHTS
+            nn->s_d_weights[i] = (float*)nalloc(nn->size_output, sizeof(float));
+            nn->v_d_weights[i] = (float*)nalloc(nn->size_output, sizeof(float));
+            #endif
             for (int j=0; j < nn->size_output; j++) {
                 (void) !fread(&tmp, sizeof(tmp), 1, ptr);
                 nn->weights[i][j] = tmp;
                 nn->d_weights[i][j] = 0.;
+                #ifdef ADAM_DENSE_WEIGHTS
+                nn->s_d_weights[i][j] = 0.;
+                nn->v_d_weights[i][j] = 0.;
+                #endif
             }
         }
     } else if (type_couche == POOLING) { // Cas du Pooling Layer
diff --git a/src/cnn/update.c b/src/cnn/update.c
index 5a1f572..f8fa448 100644
--- a/src/cnn/update.c
+++ b/src/cnn/update.c
@@ -1,8 +1,12 @@
 #include <stdio.h>
+#include <math.h>
+#include <float.h>
 
 #include "include/update.h"
 #include "include/struct.h"
 
+#include "include/config.h"
+
 float clip(float a) {
     if (a > NETWORK_CLIP_VALUE) {
         return NETWORK_CLIP_VALUE;
@@ -34,7 +38,13 @@ void update_weights(Network* network, Network* d_network) {
                 for (int b=0; b < output_depth; b++) {
                     for (int c=0; c < k_size; c++) {
                         for (int d=0; d < k_size; d++) {
+                            #ifdef ADAM_CNN_WEIGHTS
+                            d_cnn->v_d_weights[a][b][c][d] = BETA_1*d_cnn->v_d_weights[a][b][c][d] + (1-BETA_1)*d_cnn->d_weights[a][b][c][d];
+                            d_cnn->s_d_weights[a][b][c][d] = BETA_2*d_cnn->s_d_weights[a][b][c][d] + (1-BETA_2)*d_cnn->d_weights[a][b][c][d]*d_cnn->d_weights[a][b][c][d];
+                            cnn->weights[a][b][c][d] -= ALPHA*(d_cnn->v_d_weights[a][b][c][d]/sqrt(d_cnn->s_d_weights[a][b][c][d]+Epsilon));
+                            #else
                             cnn->weights[a][b][c][d] -= network->learning_rate * d_cnn->d_weights[a][b][c][d];
+                            #endif
                             d_cnn->d_weights[a][b][c][d] = 0;
 
                             cnn->weights[a][b][c][d] = clip(cnn->weights[a][b][c][d]);
@@ -49,7 +59,13 @@ void update_weights(Network* network, Network* d_network) {
 
                 for (int a=0; a < input_width; a++) {
                     for (int b=0; b < output_width; b++) {
+                        #ifdef ADAM_DENSE_WEIGHTS
+                        d_nn->v_d_weights[a][b] = BETA_1*d_nn->v_d_weights[a][b] + (1-BETA_1)*d_nn->d_weights[a][b];
+                        d_nn->s_d_weights[a][b] = BETA_2*d_nn->s_d_weights[a][b] + (1-BETA_2)*d_nn->d_weights[a][b]*d_nn->d_weights[a][b];
+                        nn->weights[a][b] -= ALPHA*(d_nn->v_d_weights[a][b]/sqrt(d_nn->s_d_weights[a][b]+Epsilon));
+                        #else
                         nn->weights[a][b] -= network->learning_rate * d_nn->d_weights[a][b];
+                        #endif
                         d_nn->d_weights[a][b] = 0;
                     }
                 }
@@ -61,7 +77,13 @@ void update_weights(Network* network, Network* d_network) {
 
                 for (int a=0; a < size_input; a++) {
                     for (int b=0; b < output_width; b++) {
+                        #ifdef ADAM_DENSE_WEIGHTS
+                        d_nn->v_d_weights[a][b] = BETA_1*d_nn->v_d_weights[a][b] + (1-BETA_1)*d_nn->d_weights[a][b];
+                        d_nn->s_d_weights[a][b] = BETA_2*d_nn->s_d_weights[a][b] + (1-BETA_2)*d_nn->d_weights[a][b]*d_nn->d_weights[a][b];
+                        nn->weights[a][b] -= ALPHA*(d_nn->d_weights[a][b]/sqrt(d_nn->s_d_weights[a][b]+Epsilon));
+                        #else
                         nn->weights[a][b] -= network->learning_rate * d_nn->d_weights[a][b];
+                        #endif
                         d_nn->d_weights[a][b] = 0;
 
                         nn->weights[a][b] = clip(nn->weights[a][b]);
@@ -89,7 +111,12 @@ void update_bias(Network* network, Network* d_network) {
             for (int a=0; a < output_depth; a++) {
                 for (int b=0; b < output_width; b++) {
                     for (int c=0; c < output_width; c++) {
+                        #ifdef ADAM_CNN_BIAS
+                        d_cnn->s_d_bias[a][b][c] = BETA_2*d_cnn->s_d_bias[a][b][c] + (1-BETA_2)*d_cnn->d_bias[a][b][c]*d_cnn->d_bias[a][b][c];
+                        cnn->bias[a][b][c] -= ALPHA*(d_cnn->d_bias[a][b][c]/sqrt(d_cnn->s_d_bias[a][b][c]+Epsilon));
+                        #else
                         cnn->bias[a][b][c] -= network->learning_rate * d_cnn->d_bias[a][b][c];
+                        #endif
                         d_cnn->d_bias[a][b][c] = 0;
 
                         cnn->bias[a][b][c] = clip(cnn->bias[a][b][c]);
@@ -101,7 +128,12 @@ void update_bias(Network* network, Network* d_network) {
             Kernel_nn* d_nn = dk_i->nn;
 
             for (int a=0; a < output_width; a++) {
+                #ifdef ADAM_DENSE_BIAS
+                d_nn->s_d_bias[a] = BETA_2*d_nn->s_d_bias[a] + (1-BETA_2)*d_nn->d_bias[a]*d_nn->d_bias[a];
+                nn->bias[a] -= ALPHA*(d_nn->d_bias[a]/sqrt(d_nn->s_d_bias[a]+Epsilon));
+                #else
                 nn->bias[a] -= network->learning_rate * d_nn->d_bias[a];
+                #endif
                 d_nn->d_bias[a] = 0;
 
                 nn->bias[a] = clip(nn->bias[a]);
diff --git a/src/cnn/utils.c b/src/cnn/utils.c
index 1a211ba..8de8b51 100644
--- a/src/cnn/utils.c
+++ b/src/cnn/utils.c
@@ -148,19 +148,39 @@ Network* copy_network(Network* network) {
 
             network_cp->kernel[i]->nn->bias = (float*)nalloc(size_output, sizeof(float));
             network_cp->kernel[i]->nn->d_bias = (float*)nalloc(size_output, sizeof(float));
+            #ifdef ADAM_DENSE_BIAS
+            network_cp->kernel[i]->nn->s_d_bias = (float*)nalloc(size_output, sizeof(float));
+            network_cp->kernel[i]->nn->v_d_bias = (float*)nalloc(size_output, sizeof(float));
+            #endif
             for (int j=0; j < size_output; j++) {
                 copyVar(kernel[i]->nn->bias[j]);
                 network_cp->kernel[i]->nn->d_bias[j] = 0.;
+                #ifdef ADAM_DENSE_BIAS
+                network_cp->kernel[i]->nn->s_d_bias[j] = 0.;
+                network_cp->kernel[i]->nn->v_d_bias[j] = 0.;
+                #endif
             }
 
             network_cp->kernel[i]->nn->weights = (float**)nalloc(size_input, sizeof(float*));
             network_cp->kernel[i]->nn->d_weights = (float**)nalloc(size_input, sizeof(float*));
+            #ifdef ADAM_DENSE_WEIGHTS
+            network_cp->kernel[i]->nn->s_d_weights = (float**)nalloc(size_input, sizeof(float*));
+            network_cp->kernel[i]->nn->v_d_weights = (float**)nalloc(size_input, sizeof(float*));
+            #endif
             for (int j=0; j < size_input; j++) {
                 network_cp->kernel[i]->nn->weights[j] = (float*)nalloc(size_output, sizeof(float));
                 network_cp->kernel[i]->nn->d_weights[j] = (float*)nalloc(size_output, sizeof(float));
+                #ifdef ADAM_DENSE_WEIGHTS
+                network_cp->kernel[i]->nn->s_d_weights[j] = (float*)nalloc(size_output, sizeof(float));
+                network_cp->kernel[i]->nn->v_d_weights[j] = (float*)nalloc(size_output, sizeof(float));
+                #endif
                 for (int k=0; k < size_output; k++) {
                     copyVar(kernel[i]->nn->weights[j][k]);
                     network_cp->kernel[i]->nn->d_weights[j][k] = 0.;
+                    #ifdef ADAM_DENSE_WEIGHTS
+                    network_cp->kernel[i]->nn->s_d_weights[j][k] = 0.;
+                    network_cp->kernel[i]->nn->v_d_weights[j][k] = 0.;
+                    #endif
                 }
             }
         }
@@ -184,33 +204,69 @@ Network* copy_network(Network* network) {
 
             network_cp->kernel[i]->cnn->bias = (float***)nalloc(columns, sizeof(float**));
             network_cp->kernel[i]->cnn->d_bias = (float***)nalloc(columns, sizeof(float**));
+            #ifdef ADAM_CNN_BIAS
+            network_cp->kernel[i]->cnn->s_d_bias = (float***)nalloc(columns, sizeof(float**));
+            network_cp->kernel[i]->cnn->v_d_bias = (float***)nalloc(columns, sizeof(float**));
+            #endif
             for (int j=0; j < columns; j++) {
                 network_cp->kernel[i]->cnn->bias[j] = (float**)nalloc(output_dim, sizeof(float*));
                 network_cp->kernel[i]->cnn->d_bias[j] = (float**)nalloc(output_dim, sizeof(float*));
+                #ifdef ADAM_CNN_BIAS
+                network_cp->kernel[i]->cnn->s_d_bias[j] = (float**)nalloc(output_dim, sizeof(float*));
+                network_cp->kernel[i]->cnn->v_d_bias[j] = (float**)nalloc(output_dim, sizeof(float*));
+                #endif
                 for (int k=0; k < output_dim; k++) {
                     network_cp->kernel[i]->cnn->bias[j][k] = (float*)nalloc(output_dim, sizeof(float));
                     network_cp->kernel[i]->cnn->d_bias[j][k] = (float*)nalloc(output_dim, sizeof(float));
+                    #ifdef ADAM_CNN_BIAS
+                    network_cp->kernel[i]->cnn->s_d_bias[j][k] = (float*)nalloc(output_dim, sizeof(float));
+                    network_cp->kernel[i]->cnn->v_d_bias[j][k] = (float*)nalloc(output_dim, sizeof(float));
+                    #endif
                     for (int l=0; l < output_dim; l++) {
                         copyVar(kernel[i]->cnn->bias[j][k][l]);
                         network_cp->kernel[i]->cnn->d_bias[j][k][l] = 0.;
+                        #ifdef ADAM_CNN_BIAS
+                        network_cp->kernel[i]->cnn->s_d_bias[j][k][l] = 0.;
+                        network_cp->kernel[i]->cnn->v_d_bias[j][k][l] = 0.;
+                        #endif
                     }
                 }
             }
 
             network_cp->kernel[i]->cnn->weights = (float****)nalloc(rows, sizeof(float***));
             network_cp->kernel[i]->cnn->d_weights = (float****)nalloc(rows, sizeof(float***));
+            #ifdef ADAM_CNN_WEIGHTS
+            network_cp->kernel[i]->cnn->s_d_weights = (float****)nalloc(rows, sizeof(float***));
+            network_cp->kernel[i]->cnn->v_d_weights = (float****)nalloc(rows, sizeof(float***));
+            #endif
             for (int j=0; j < rows; j++) {
                 network_cp->kernel[i]->cnn->weights[j] = (float***)nalloc(columns, sizeof(float**));
                 network_cp->kernel[i]->cnn->d_weights[j] = (float***)nalloc(columns, sizeof(float**));
+                #ifdef ADAM_CNN_WEIGHTS
+                network_cp->kernel[i]->cnn->s_d_weights[j] = (float***)nalloc(columns, sizeof(float**));
+                network_cp->kernel[i]->cnn->v_d_weights[j] = (float***)nalloc(columns, sizeof(float**));
+                #endif
                 for (int k=0; k < columns; k++) {
                     network_cp->kernel[i]->cnn->weights[j][k] = (float**)nalloc(k_size, sizeof(float*));
                     network_cp->kernel[i]->cnn->d_weights[j][k] = (float**)nalloc(k_size, sizeof(float*));
+                    #ifdef ADAM_CNN_WEIGHTS
+                    network_cp->kernel[i]->cnn->s_d_weights[j][k] = (float**)nalloc(k_size, sizeof(float*));
+                    network_cp->kernel[i]->cnn->v_d_weights[j][k] = (float**)nalloc(k_size, sizeof(float*));
+                    #endif
                     for (int l=0; l < k_size; l++) {
                         network_cp->kernel[i]->cnn->weights[j][k][l] = (float*)nalloc(k_size, sizeof(float));
                         network_cp->kernel[i]->cnn->d_weights[j][k][l] = (float*)nalloc(k_size, sizeof(float));
+                        #ifdef ADAM_CNN_WEIGHTS
+                        network_cp->kernel[i]->cnn->s_d_weights[j][k][l] = (float*)nalloc(k_size, sizeof(float));
+                        network_cp->kernel[i]->cnn->v_d_weights[j][k][l] = (float*)nalloc(k_size, sizeof(float));
+                        #endif
                         for (int m=0; m < k_size; m++) {
                             copyVar(kernel[i]->cnn->weights[j][k][l][m]);
                             network_cp->kernel[i]->cnn->d_weights[j][k][l][m] = 0.;
+                            #ifdef ADAM_CNN_WEIGHTS
+                            network_cp->kernel[i]->cnn->s_d_weights[j][k][l][m] = 0.;
+                            network_cp->kernel[i]->cnn->v_d_weights[j][k][l][m] = 0.;
+                            #endif
                         }
                     }
                 }
diff --git a/src/scripts/convolution_benchmark.cu b/src/scripts/convolution_benchmark.cu
index 11b5b03..4650cad 100644
--- a/src/scripts/convolution_benchmark.cu
+++ b/src/scripts/convolution_benchmark.cu
@@ -1,3 +1,5 @@
+//! This file uses an old implementation of convolution which uses linearised matrices
+//! It is therefore not compatible nor compilable now.
 #include <stdlib.h>
 #include <stdio.h>
 #include <stdbool.h>
@@ -114,17 +116,33 @@ void run_convolution_test(int input_dim, int output_dim, int rows, int columns)
     // bias[kernel->columns]
     kernel->bias = (float*)malloc(kernel->columns, sizeof(float));
     kernel->d_bias = (float*)malloc(kernel->columns, sizeof(float));
+    #ifdef ADAM_CNN_BIAS
+    kernel->s_d_bias = (float*)malloc(kernel->columns, sizeof(float));
+    kernel->v_d_bias = (float*)malloc(kernel->columns, sizeof(float));
+    #endif
     for (int i=0; i<kernel->columns; i++) {
         kernel->bias[i] = random_float(0.0f, 15.0f);
         kernel->d_bias[i] = random_float(0.0f, 1.5f);
+        #ifdef ADAM_CNN_BIAS
+        kernel->s_d_bias[i] = random_float(0.0f, 1.5f);
+        kernel->v_d_bias[i] = random_float(0.0f, 1.5f);
+        #endif
     }
 
     // weights[rows][columns][k_size][k_size]
     kernel->weights = (float****)malloc(sizeof(float***)*kernel->rows);
     kernel->d_weights = (float****)malloc(sizeof(float***)*kernel->rows);
+    #ifdef ADAM_CNN_WEIGHTS
+    kernel->s_d_weights = (float****)malloc(sizeof(float***)*kernel->rows);
+    kernel->v_d_weights = (float****)malloc(sizeof(float***)*kernel->rows);
+    #endif
     for (int i=0; i < kernel->rows; i++) {
         kernel->weights[i] = create_matrix(kernel->columns, kernel->k_size, kernel->k_size, 15.0f);
         kernel->d_weights[i] = create_matrix(kernel->columns, kernel->k_size, kernel->k_size, 1.5f);
+        #ifdef ADAM_CNN_WEIGHTS
+        kernel->s_d_weights[i] = create_matrix(kernel->columns, kernel->k_size, kernel->k_size, 1.5f);
+        kernel->v_d_weights[i] = create_matrix(kernel->columns, kernel->k_size, kernel->k_size, 1.5f);
+        #endif
     }
 
     float*** input = create_matrix(kernel->rows, input_dim, input_dim, 5.0f);
@@ -162,13 +180,25 @@ void run_convolution_test(int input_dim, int output_dim, int rows, int columns)
 
     free(kernel->bias);
     free(kernel->d_bias);
+    #ifdef ADAM_CNN_BIAS
+    free(kernel->s_d_bias);
+    free(kernel->v_d_bias);
+    #endif
 
     for (int i=0; i < kernel->rows; i++) {
         free_matrix(kernel->weights[i], kernel->columns, kernel->k_size);
         free_matrix(kernel->d_weights[i], kernel->columns, kernel->k_size);
+        #ifdef ADAM_CNN_WEIGHTS
+        free_matrix(kernel->s_d_weights[i], kernel->columns, kernel->k_size);
+        free_matrix(kernel->v_d_weights[i], kernel->columns, kernel->k_size);
+        #endif
     }
     free(kernel->weights);
     free(kernel->d_weights);
+    #ifdef ADAM_CNN_WEIGHTS
+    free(kernel->s_d_weights);
+    free(kernel->v_d_weights);
+    #endif
 
     free_matrix(input, kernel->rows, input_dim);
     free_matrix(output_cpu, kernel->columns, output_dim);
diff --git a/test/cnn_convolution.cu b/test/cnn_convolution.cu
index 049657b..9716e62 100644
--- a/test/cnn_convolution.cu
+++ b/test/cnn_convolution.cu
@@ -107,13 +107,25 @@ void run_convolution_test(int input_dim, int output_dim, int rows, int columns)
     // bias[kernel->columns][dim_output][dim_output]
     kernel->bias = create_matrix(kernel->columns, output_dim, output_dim, 15.0f);
     kernel->d_bias = create_matrix(kernel->columns, output_dim, output_dim, 1.5f);
+    #ifdef ADAM_CNN_BIAS
+    kernel->s_d_bias = create_matrix(kernel->columns, output_dim, output_dim, 1.5f);
+    kernel->v_d_bias = create_matrix(kernel->columns, output_dim, output_dim, 1.5f);
+    #endif
 
     // weights[rows][columns][k_size][k_size]
     kernel->weights = (float****)nalloc(kernel->rows, sizeof(float***));
     kernel->d_weights = (float****)nalloc(kernel->rows, sizeof(float***));
+    #ifdef ADAM_CNN_WEIGHTS
+    kernel->s_d_weights = (float****)nalloc(kernel->rows, sizeof(float***));
+    kernel->v_d_weights = (float****)nalloc(kernel->rows, sizeof(float***));
+    #endif
     for (int i=0; i < kernel->rows; i++) {
         kernel->weights[i] = create_matrix(kernel->columns, kernel->k_size, kernel->k_size, 15.0f);
         kernel->d_weights[i] = create_matrix(kernel->columns, kernel->k_size, kernel->k_size, 1.5f);
+        #ifdef ADAM_CNN_WEIGHTS
+        kernel->s_d_weights[i] = create_matrix(kernel->columns, kernel->k_size, kernel->k_size, 1.5f);
+        kernel->v_d_weights[i] = create_matrix(kernel->columns, kernel->k_size, kernel->k_size, 1.5f);
+        #endif
     }
 
     float*** input = create_matrix(kernel->rows, input_dim, input_dim, 5.0f);
@@ -152,13 +164,25 @@ void run_convolution_test(int input_dim, int output_dim, int rows, int columns)
 
     free_matrix(kernel->bias, kernel->columns, output_dim);
     free_matrix(kernel->d_bias, kernel->columns, output_dim);
+    #ifdef ADAM_CNN_BIAS
+    free_matrix(kernel->s_d_bias, kernel->columns, output_dim);
+    free_matrix(kernel->v_d_bias, kernel->columns, output_dim);
+    #endif
 
     for (int i=0; i < kernel->rows; i++) {
         free_matrix(kernel->weights[i], kernel->columns, kernel->k_size);
         free_matrix(kernel->d_weights[i], kernel->columns, kernel->k_size);
+        #ifdef ADAM_CNN_WEIGHTS
+        free_matrix(kernel->s_d_weights[i], kernel->columns, kernel->k_size);
+        free_matrix(kernel->v_d_weights[i], kernel->columns, kernel->k_size);
+        #endif
     }
     gree(kernel->weights);
     gree(kernel->d_weights);
+    #ifdef ADAM_CNN_WEIGHTS
+    gree(kernel->s_d_weights);
+    gree(kernel->v_d_weights);
+    #endif
 
     free_matrix(input, kernel->rows, input_dim);
     free_matrix(output_cpu, kernel->columns, output_dim);