Back to multiple bias implementation

This commit is contained in:
julienChemillier 2023-03-18 13:25:58 +01:00
parent 00ac29b1d0
commit 1bd92074ab
12 changed files with 95 additions and 41 deletions

View File

@ -146,11 +146,10 @@ void backward_linearisation(Kernel_nn* ker, float*** input, float*** input_z, fl
void backward_convolution(Kernel_cnn* ker, float*** input, float*** input_z, float*** output, int depth_input, int dim_input, int depth_output, int dim_output, ptr d_function, int is_first) { void backward_convolution(Kernel_cnn* ker, float*** input, float*** input_z, float*** output, int depth_input, int dim_input, int depth_output, int dim_output, ptr d_function, int is_first) {
// Bias // Bias
int n = dim_output*dim_output;
for (int i=0; i < depth_output; i++) { for (int i=0; i < depth_output; i++) {
for (int j=0; j < dim_output; j++) { for (int j=0; j < dim_output; j++) {
for (int k=0; k < dim_output; k++) { for (int k=0; k < dim_output; k++) {
ker->d_bias[i] += output[i][j][k]/n; ker->d_bias[i][j][k] += output[i][j][k];
} }
} }
} }

View File

@ -20,7 +20,7 @@ void make_convolution_cpu(Kernel_cnn* kernel, float*** input, float*** output, i
for (int i=0; i < kernel->columns; i++) { // filtre for (int i=0; i < kernel->columns; i++) { // filtre
for (int j=0; j < output_dim; j++) { // ligne de sortie for (int j=0; j < output_dim; j++) { // ligne de sortie
for (int k=0; k < output_dim; k++) { // colonne de sortie for (int k=0; k < output_dim; k++) { // colonne de sortie
f = kernel->bias[i]; f = kernel->bias[i][j][k];
for (int a=0; a < kernel->rows; a++) { // Canal de couleur for (int a=0; a < kernel->rows; a++) { // Canal de couleur
for (int b=0; b < kernel->k_size; b++) { // ligne du filtre for (int b=0; b < kernel->k_size; b++) { // ligne du filtre
for (int c=0; c < kernel->k_size; c++) { // colonne du filtre for (int c=0; c < kernel->k_size; c++) { // colonne du filtre
@ -46,7 +46,7 @@ __global__ void make_convolution_kernel(Kernel_cnn* kernel, float*** input, floa
return; return;
} }
float f = kernel->bias[idx]; float f = kernel->bias[idx][idy][idz];
for (int a=0; a < kernel->rows; a++) { for (int a=0; a < kernel->rows; a++) {
for (int b=0; b < kernel->k_size; b++) { for (int b=0; b < kernel->k_size; b++) {

View File

@ -20,7 +20,7 @@ void make_convolution_cpu(Kernel_cnn* kernel, float*** input, float*** output, i
for (int i=0; i < kernel->columns; i++) { // filtre for (int i=0; i < kernel->columns; i++) { // filtre
for (int j=0; j < output_dim; j++) { // ligne de sortie for (int j=0; j < output_dim; j++) { // ligne de sortie
for (int k=0; k < output_dim; k++) { // colonne de sortie for (int k=0; k < output_dim; k++) { // colonne de sortie
f = kernel->bias[i]; f = kernel->bias[i][j][k];
for (int a=0; a < kernel->rows; a++) { // Canal de couleur for (int a=0; a < kernel->rows; a++) { // Canal de couleur
for (int b=0; b < kernel->k_size; b++) { // ligne du filtre for (int b=0; b < kernel->k_size; b++) { // ligne du filtre
for (int c=0; c < kernel->k_size; c++) { // colonne du filtre for (int c=0; c < kernel->k_size; c++) { // colonne du filtre
@ -46,7 +46,7 @@ __global__ void make_convolution_kernel(Kernel_cnn* kernel, float*** input, floa
return; return;
} }
float f = kernel->bias[idx]; float f = kernel->bias[idx][idy][idz];
for (int a=0; a < kernel->rows; a++) { for (int a=0; a < kernel->rows; a++) {
for (int b=0; b < kernel->k_size; b++) { for (int b=0; b < kernel->k_size; b++) {

View File

@ -180,14 +180,22 @@ void add_convolution(Network* network, int depth_output, int dim_output, int act
} }
} }
} }
cnn->bias = (float*)nalloc(depth_output, sizeof(float)); cnn->bias = (float***)nalloc(depth_output, sizeof(float**));
cnn->d_bias = (float*)nalloc(depth_output, sizeof(float)); cnn->d_bias = (float***)nalloc(depth_output, sizeof(float**));
for (int i=0; i < depth_output; i++) { for (int i=0; i < depth_output; i++) {
cnn->d_bias[i] = 0; cnn->bias[i] = (float**)nalloc(bias_size, sizeof(float*));
cnn->d_bias[i] = (float**)nalloc(bias_size, sizeof(float*));
for (int j=0; j < bias_size; j++) {
cnn->bias[i][j] = (float*)nalloc(bias_size, sizeof(float));
cnn->d_bias[i][j] = (float*)nalloc(bias_size, sizeof(float));
for (int k=0; k < bias_size; k++) {
cnn->d_bias[i][j][k] = 0.;
}
}
} }
int n_in = kernel_size*kernel_size; int n_in = network->width[n-1]*network->width[n-1]*network->depth[n-1];
int n_out = network->width[n]*network->width[n]*network->depth[n]; int n_out = network->width[n]*network->width[n]*network->depth[n];
initialisation_1d_matrix(network->initialisation, cnn->bias, depth_output, n_in, n_out); initialisation_3d_matrix(network->initialisation, cnn->bias, depth_output, dim_output, dim_output, n_in, n_out);
initialisation_4d_matrix(network->initialisation, cnn->weights, depth_input, depth_output, kernel_size, kernel_size, n_in, n_out); initialisation_4d_matrix(network->initialisation, cnn->weights, depth_input, depth_output, kernel_size, kernel_size, n_in, n_out);
create_a_cube_input_layer(network, n, depth_output, bias_size); create_a_cube_input_layer(network, n, depth_output, bias_size);
create_a_cube_input_z_layer(network, n, depth_output, bias_size); create_a_cube_input_z_layer(network, n, depth_output, bias_size);

View File

@ -36,7 +36,16 @@ void free_convolution(Network* network, int pos) {
int c = k_pos->columns; int c = k_pos->columns;
int k_size = k_pos->k_size; int k_size = k_pos->k_size;
int r = k_pos->rows; int r = k_pos->rows;
int bias_size = network->width[pos+1]; // Not sure of the value
free_a_cube_input_layer(network, pos+1, network->depth[pos+1], network->width[pos+1]); free_a_cube_input_layer(network, pos+1, network->depth[pos+1], network->width[pos+1]);
for (int i=0; i < c; i++) {
for (int j=0; j < bias_size; j++) {
gree(k_pos->bias[i][j]);
gree(k_pos->d_bias[i][j]);
}
gree(k_pos->bias[i]);
gree(k_pos->d_bias[i]);
}
gree(k_pos->bias); gree(k_pos->bias);
gree(k_pos->d_bias); gree(k_pos->d_bias);

View File

@ -13,8 +13,8 @@ typedef struct Kernel_cnn {
int k_size; // k_size = dim_input - dim_output + 1 int k_size; // k_size = dim_input - dim_output + 1
int rows; // Depth de l'input int rows; // Depth de l'input
int columns; // Depth de l'output int columns; // Depth de l'output
float* bias; // bias[columns] float*** bias; // bias[columns][dim_output][dim_output]
float* d_bias; // d_bias[columns] float*** d_bias; // d_bias[columns][dim_output][dim_output]
float**** weights; // weights[rows][columns][k_size][k_size] float**** weights; // weights[rows][columns][k_size][k_size]
float**** d_weights; // d_weights[rows][columns][k_size][k_size] float**** d_weights; // d_weights[rows][columns][k_size][k_size]
} Kernel_cnn; } Kernel_cnn;

View File

@ -91,7 +91,7 @@ void write_couche(Network* network, int indice_couche, int type_couche, FILE* pt
float buffer[output_dim*output_dim]; float buffer[output_dim*output_dim];
for (int j=0; j < output_dim; j++) { for (int j=0; j < output_dim; j++) {
for (int k=0; k < output_dim; k++) { for (int k=0; k < output_dim; k++) {
bufferAdd(cnn->bias[i]); bufferAdd(cnn->bias[i][j][k]);
} }
} }
fwrite(buffer, sizeof(buffer), 1, ptr); fwrite(buffer, sizeof(buffer), 1, ptr);
@ -247,14 +247,18 @@ Kernel* read_kernel(int type_couche, int output_dim, FILE* ptr) {
Kernel_cnn* cnn = kernel->cnn; Kernel_cnn* cnn = kernel->cnn;
float tmp; float tmp;
cnn->bias = (float*)nalloc(cnn->columns, sizeof(float)); cnn->bias = (float***)nalloc(cnn->columns, sizeof(float**));
cnn->d_bias = (float*)nalloc(cnn->columns, sizeof(float)); cnn->d_bias = (float***)nalloc(cnn->columns, sizeof(float**));
for (int i=0; i < cnn->columns; i++) { for (int i=0; i < cnn->columns; i++) {
cnn->bias[i] = (float**)nalloc(output_dim, sizeof(float*));
cnn->d_bias[i] = (float**)nalloc(output_dim, sizeof(float*));
for (int j=0; j < output_dim; j++) { for (int j=0; j < output_dim; j++) {
cnn->bias[i][j] = (float*)nalloc(output_dim, sizeof(float));
cnn->d_bias[i][j] = (float*)nalloc(output_dim, sizeof(float));
for (int k=0; k < output_dim; k++) { for (int k=0; k < output_dim; k++) {
(void) !fread(&tmp, sizeof(tmp), 1, ptr); (void) !fread(&tmp, sizeof(tmp), 1, ptr);
cnn->bias[i] = tmp; cnn->bias[i][j][k] = tmp;
cnn->d_bias[i] = 0.; cnn->d_bias[i][j][k] = 0.;
} }
} }
} }

View File

@ -18,7 +18,7 @@ void print_kernel_cnn(Kernel_cnn* ker, int depth_input, int dim_input, int depth
for (int i=0; i<depth_output; i++) { for (int i=0; i<depth_output; i++) {
for (int j=0; j<dim_output; j++) { for (int j=0; j<dim_output; j++) {
for (int k=0; k<dim_output; k++) { for (int k=0; k<dim_output; k++) {
printf("%.2f", ker->bias[i]); printf("%.2f", ker->bias[i][j][k]);
} }
print_space; print_space;
} }

View File

@ -348,7 +348,7 @@ void train(int dataset_type, char* images_file, char* labels_file, char* data_di
#endif #endif
write_network(out, network); write_network(out, network);
// If you want to test the network between each epoch, uncomment the following line: // If you want to test the network between each epoch, uncomment the following line:
//test_network(0, out, "data/mnist/t10k-images-idx3-ubyte", "data/mnist/t10k-labels-idx1-ubyte", NULL, false); test_network(0, out, "data/mnist/t10k-images-idx3-ubyte", "data/mnist/t10k-labels-idx1-ubyte", NULL, false);
// Learning Rate decay // Learning Rate decay
network->learning_rate -= LEARNING_RATE*(1./(float)(epochs+1)); network->learning_rate -= LEARNING_RATE*(1./(float)(epochs+1));

View File

@ -87,9 +87,14 @@ void update_bias(Network* network, Network* d_network) {
Kernel_cnn* d_cnn = dk_i->cnn; Kernel_cnn* d_cnn = dk_i->cnn;
for (int a=0; a < output_depth; a++) { for (int a=0; a < output_depth; a++) {
cnn->bias[a] -= network->learning_rate * d_cnn->d_bias[a]; for (int b=0; b < output_width; b++) {
d_cnn->d_bias[a] = 0; for (int c=0; c < output_width; c++) {
cnn->bias[a] = clip(cnn->bias[a]); cnn->bias[a][b][c] -= network->learning_rate * d_cnn->d_bias[a][b][c];
d_cnn->d_bias[a][b][c] = 0;
cnn->bias[a][b][c] = clip(cnn->bias[a][b][c]);
}
}
} }
} else if (k_i->nn) { // Full connection } else if (k_i->nn) { // Full connection
Kernel_nn* nn = k_i->nn; Kernel_nn* nn = k_i->nn;
@ -172,7 +177,11 @@ void reset_d_bias(Network* network) {
Kernel_cnn* cnn = k_i_1->cnn; Kernel_cnn* cnn = k_i_1->cnn;
for (int a=0; a < output_depth; a++) { for (int a=0; a < output_depth; a++) {
cnn->d_bias[a] = 0; for (int b=0; b < output_width; b++) {
for (int c=0; c < output_width; c++) {
cnn->d_bias[a][b][c] = 0;
}
}
} }
} else if (k_i->nn) { // Full connection } else if (k_i->nn) { // Full connection
Kernel_nn* nn = k_i_1->nn; Kernel_nn* nn = k_i_1->nn;

View File

@ -33,6 +33,7 @@ void knuth_shuffle(int* tab, int n) {
} }
bool equals_networks(Network* network1, Network* network2) { bool equals_networks(Network* network1, Network* network2) {
int output_dim;
checkEquals(size, "size", -1); checkEquals(size, "size", -1);
checkEquals(initialisation, "initialisation", -1); checkEquals(initialisation, "initialisation", -1);
checkEquals(dropout, "dropout", -1); checkEquals(dropout, "dropout", -1);
@ -67,17 +68,22 @@ bool equals_networks(Network* network1, Network* network2) {
} }
} else { } else {
// Type CNN // Type CNN
output_dim = network1->width[i+1];
checkEquals(kernel[i]->cnn->k_size, "kernel[i]->k_size", i); checkEquals(kernel[i]->cnn->k_size, "kernel[i]->k_size", i);
checkEquals(kernel[i]->cnn->rows, "kernel[i]->rows", i); checkEquals(kernel[i]->cnn->rows, "kernel[i]->rows", i);
checkEquals(kernel[i]->cnn->columns, "kernel[i]->columns", i); checkEquals(kernel[i]->cnn->columns, "kernel[i]->columns", i);
for (int j=0; j < network1->kernel[i]->cnn->columns; j++) { for (int j=0; j < network1->kernel[i]->cnn->columns; j++) {
checkEquals(kernel[i]->cnn->bias[j], "kernel[i]->cnn->bias[j]", j); for (int k=0; k < output_dim; k++) {
for (int l=0; l < output_dim; l++) {
checkEquals(kernel[i]->cnn->bias[j][k][l], "kernel[i]->cnn->bias[j][k][l]", l);
}
}
} }
for (int j=0; j < network1->kernel[i]->cnn->rows; j++) { for (int j=0; j < network1->kernel[i]->cnn->rows; j++) {
for (int k=0; k < network1->kernel[i]->cnn->columns; k++) { for (int k=0; k < network1->kernel[i]->cnn->columns; k++) {
for (int l=0; l < network1->kernel[i]->cnn->k_size; l++) { for (int l=0; l < network1->kernel[i]->cnn->k_size; l++) {
for (int m=0; m < network1->kernel[i]->cnn->k_size; m++) { for (int m=0; m < network1->kernel[i]->cnn->k_size; m++) {
checkEquals(kernel[i]->cnn->weights[j][k][l][m], "kernel[i]->cnn->weights[j][k][l][m]", m); checkEquals(kernel[i]->cnn->weights[j][k][l][m], "kernel[i]->cnn->bias[j][k][l][m]", m);
} }
} }
} }
@ -100,6 +106,7 @@ Network* copy_network(Network* network) {
int rows; int rows;
int k_size; int k_size;
int columns; int columns;
int output_dim;
copyVar(dropout); copyVar(dropout);
copyVar(learning_rate); copyVar(learning_rate);
@ -165,6 +172,8 @@ Network* copy_network(Network* network) {
rows = network->kernel[i]->cnn->rows; rows = network->kernel[i]->cnn->rows;
k_size = network->kernel[i]->cnn->k_size; k_size = network->kernel[i]->cnn->k_size;
columns = network->kernel[i]->cnn->columns; columns = network->kernel[i]->cnn->columns;
output_dim = network->width[i+1];
network_cp->kernel[i]->nn = NULL; network_cp->kernel[i]->nn = NULL;
network_cp->kernel[i]->cnn = (Kernel_cnn*)nalloc(1, sizeof(Kernel_cnn)); network_cp->kernel[i]->cnn = (Kernel_cnn*)nalloc(1, sizeof(Kernel_cnn));
@ -173,11 +182,19 @@ Network* copy_network(Network* network) {
copyVar(kernel[i]->cnn->k_size); copyVar(kernel[i]->cnn->k_size);
copyVar(kernel[i]->cnn->columns); copyVar(kernel[i]->cnn->columns);
network_cp->kernel[i]->cnn->bias = (float*)nalloc(columns, sizeof(float)); network_cp->kernel[i]->cnn->bias = (float***)nalloc(columns, sizeof(float**));
network_cp->kernel[i]->cnn->d_bias = (float*)nalloc(columns, sizeof(float)); network_cp->kernel[i]->cnn->d_bias = (float***)nalloc(columns, sizeof(float**));
for (int j=0; j < columns; j++) { for (int j=0; j < columns; j++) {
copyVar(kernel[i]->cnn->bias[j]); network_cp->kernel[i]->cnn->bias[j] = (float**)nalloc(output_dim, sizeof(float*));
network_cp->kernel[i]->cnn->d_bias[j] = 0.; network_cp->kernel[i]->cnn->d_bias[j] = (float**)nalloc(output_dim, sizeof(float*));
for (int k=0; k < output_dim; k++) {
network_cp->kernel[i]->cnn->bias[j][k] = (float*)nalloc(output_dim, sizeof(float));
network_cp->kernel[i]->cnn->d_bias[j][k] = (float*)nalloc(output_dim, sizeof(float));
for (int l=0; l < output_dim; l++) {
copyVar(kernel[i]->cnn->bias[j][k][l]);
network_cp->kernel[i]->cnn->d_bias[j][k][l] = 0.;
}
}
} }
network_cp->kernel[i]->cnn->weights = (float****)nalloc(rows, sizeof(float***)); network_cp->kernel[i]->cnn->weights = (float****)nalloc(rows, sizeof(float***));
@ -243,6 +260,7 @@ void copy_network_parameters(Network* network_src, Network* network_dest) {
int rows; int rows;
int k_size; int k_size;
int columns; int columns;
int output_dim;
copyVarParams(learning_rate); copyVarParams(learning_rate);
@ -266,9 +284,14 @@ void copy_network_parameters(Network* network_src, Network* network_dest) {
rows = network_src->kernel[i]->cnn->rows; rows = network_src->kernel[i]->cnn->rows;
k_size = network_src->kernel[i]->cnn->k_size; k_size = network_src->kernel[i]->cnn->k_size;
columns = network_src->kernel[i]->cnn->columns; columns = network_src->kernel[i]->cnn->columns;
output_dim = network_src->width[i+1];
for (int j=0; j < columns; j++) { for (int j=0; j < columns; j++) {
copyVarParams(kernel[i]->cnn->bias[j]); for (int k=0; k < output_dim; k++) {
for (int l=0; l < output_dim; l++) {
copyVarParams(kernel[i]->cnn->bias[j][k][l]);
}
}
} }
for (int j=0; j < rows; j++) { for (int j=0; j < rows; j++) {
for (int k=0; k < columns; k++) { for (int k=0; k < columns; k++) {
@ -298,6 +321,7 @@ int count_null_weights(Network* network) {
int rows; int rows;
int k_size; int k_size;
int columns; int columns;
int output_dim;
for (int i=0; i < size-1; i++) { for (int i=0; i < size-1; i++) {
if (!network->kernel[i]->cnn && network->kernel[i]->nn) { // Cas du NN if (!network->kernel[i]->cnn && network->kernel[i]->nn) { // Cas du NN
@ -319,9 +343,14 @@ int count_null_weights(Network* network) {
rows = network->kernel[i]->cnn->rows; rows = network->kernel[i]->cnn->rows;
k_size = network->kernel[i]->cnn->k_size; k_size = network->kernel[i]->cnn->k_size;
columns = network->kernel[i]->cnn->columns; columns = network->kernel[i]->cnn->columns;
output_dim = network->width[i+1];
for (int j=0; j < columns; j++) { for (int j=0; j < columns; j++) {
null_bias += fabs(network->kernel[i]->cnn->bias[j]) <= epsilon; for (int k=0; k < output_dim; k++) {
for (int l=0; l < output_dim; l++) {
null_bias += fabs(network->kernel[i]->cnn->bias[j][k][l]) <= epsilon;
}
}
} }
for (int j=0; j < rows; j++) { for (int j=0; j < rows; j++) {
for (int k=0; k < columns; k++) { for (int k=0; k < columns; k++) {

View File

@ -104,13 +104,9 @@ void run_convolution_test(int input_dim, int output_dim, int rows, int columns)
kernel->rows = rows; kernel->rows = rows;
kernel->columns = columns; kernel->columns = columns;
// bias[kernel->columns] // bias[kernel->columns][dim_output][dim_output]
kernel->bias = (float*)nalloc(kernel->columns, sizeof(float)); kernel->bias = create_matrix(kernel->columns, output_dim, output_dim, 15.0f);
kernel->d_bias = (float*)nalloc(kernel->columns, sizeof(float)); kernel->d_bias = create_matrix(kernel->columns, output_dim, output_dim, 1.5f);
for (int i=0; i<kernel->columns; i++) {
kernel->bias[i] = random_float(0.0f, 15.0f);
kernel->d_bias[i] = random_float(0.0f, 1.5f);
}
// weights[rows][columns][k_size][k_size] // weights[rows][columns][k_size][k_size]
kernel->weights = (float****)nalloc(kernel->rows, sizeof(float***)); kernel->weights = (float****)nalloc(kernel->rows, sizeof(float***));
@ -154,8 +150,8 @@ void run_convolution_test(int input_dim, int output_dim, int rows, int columns)
} }
printf(GREEN "OK\n" RESET); printf(GREEN "OK\n" RESET);
gree(kernel->bias); free_matrix(kernel->bias, kernel->columns, output_dim);
gree(kernel->d_bias); free_matrix(kernel->d_bias, kernel->columns, output_dim);
for (int i=0; i < kernel->rows; i++) { for (int i=0; i < kernel->rows; i++) {
free_matrix(kernel->weights[i], kernel->columns, kernel->k_size); free_matrix(kernel->weights[i], kernel->columns, kernel->k_size);