Change notation toward a consensus

This commit is contained in:
julienChemillier 2023-05-13 17:22:47 +02:00
parent 521a1bb729
commit c0808b9240
22 changed files with 371 additions and 371 deletions

View File

@ -173,8 +173,8 @@ Résultats pour un réseau assez conséquent, avec des images de 256x256 pixels:
<details> <details>
```c ```c
Network* create_large_network(float learning_rate, int dropout, int activation, int initialisation, int input_dim, int input_depth) { Network* create_large_network(float learning_rate, int dropout, int activation, int initialisation, int input_width, int input_depth) {
Network* network = create_network(16, learning_rate, dropout, activation, initialisation, input_dim, input_depth); Network* network = create_network(16, learning_rate, dropout, activation, initialisation, input_width, input_depth);
add_convolution(network, 6, 258, activation); add_convolution(network, 6, 258, activation);
add_convolution(network, 16, 256, activation); add_convolution(network, 16, 256, activation);
add_average_pooling(network, 64); add_average_pooling(network, 64);

View File

@ -358,16 +358,16 @@ void backward_dense(Kernel_nn* ker, float* input, float* input_z, float* output,
* Backward linearisation * Backward linearisation
*/ */
#ifdef __CUDACC__ #ifdef __CUDACC__
__global__ void backward_linearisation_kernel_1(Kernel_nn* ker, float*** input, float* output, int depth_input, int dim_input, int size_output) { __global__ void backward_linearisation_kernel_1(Kernel_nn* ker, float*** input, float* output, int input_depth, int input_width, int size_output) {
int idx = threadIdx.x + blockDim.x*blockIdx.x; // < depth_input int idx = threadIdx.x + blockDim.x*blockIdx.x; // < input_depth
int idy = threadIdx.y + blockDim.y*blockIdx.y; // < dim_input int idy = threadIdx.y + blockDim.y*blockIdx.y; // < input_width
int idz = threadIdx.z + blockDim.z*blockIdx.z; // < dim_input int idz = threadIdx.z + blockDim.z*blockIdx.z; // < input_width
if (idx >= depth_input || idy >= dim_input || idz >= dim_input) { if (idx >= input_depth || idy >= input_width || idz >= input_width) {
return; return;
} }
int id = idx*dim_input*dim_input + idy*dim_input + idz; int id = idx*input_width*input_width + idy*input_width + idz;
for (int j=0; j < size_output; j++) { for (int j=0; j < size_output; j++) {
ker->d_weights[id][j] += input[idx][idy][idz]*output[j]; ker->d_weights[id][j] += input[idx][idy][idz]*output[j];
@ -379,15 +379,15 @@ __global__ void backward_linearisation_kernel_1(Kernel_nn* ker, float*** input,
} }
} }
__global__ void backward_linearisation_kernel_2(Kernel_nn* ker, float*** input, float*** input_z, float* output, int depth_input, int dim_input, int size_output, funcPtr d_f) { __global__ void backward_linearisation_kernel_2(Kernel_nn* ker, float*** input, float*** input_z, float* output, int input_depth, int input_width, int size_output, funcPtr d_f) {
int idx = threadIdx.x + blockDim.x*blockIdx.x; // < depth_input int idx = threadIdx.x + blockDim.x*blockIdx.x; // < input_depth
int idy = threadIdx.y + blockDim.y*blockIdx.y; // < dim_input int idy = threadIdx.y + blockDim.y*blockIdx.y; // < input_width
int idz = threadIdx.z + blockDim.z*blockIdx.z; // < dim_input int idz = threadIdx.z + blockDim.z*blockIdx.z; // < input_width
if (idx >= depth_input || idy >= dim_input || idz >= dim_input) { if (idx >= input_depth || idy >= input_width || idz >= input_width) {
return; return;
} }
int id = idx*dim_input*dim_input + idy*dim_input + idz; int id = idx*input_width*input_width + idy*input_width + idz;
float tmp=0; float tmp=0;
for (int j=0; j < size_output; j++) { for (int j=0; j < size_output; j++) {
@ -396,12 +396,12 @@ __global__ void backward_linearisation_kernel_2(Kernel_nn* ker, float*** input,
input[idx][idy][idz] = tmp*( (*d_f)(input_z[idx][idy][idz]) ); input[idx][idy][idz] = tmp*( (*d_f)(input_z[idx][idy][idz]) );
} }
void backward_linearisation_device(Kernel_nn* ker, float*** input, float*** input_z, float* output, int depth_input, int dim_input, int size_output, int activation) { void backward_linearisation_device(Kernel_nn* ker, float*** input, float*** input_z, float* output, int input_depth, int input_width, int size_output, int activation) {
// Make computation // Make computation
dim3 gridSize(i_div_up(depth_input, BLOCKSIZE_x), i_div_up(dim_input, BLOCKSIZE_y), i_div_up(dim_input, BLOCKSIZE_y)); dim3 gridSize(i_div_up(input_depth, BLOCKSIZE_x), i_div_up(input_width, BLOCKSIZE_y), i_div_up(input_width, BLOCKSIZE_y));
dim3 blockSize(BLOCKSIZE_x, BLOCKSIZE_y, BLOCKSIZE_z); dim3 blockSize(BLOCKSIZE_x, BLOCKSIZE_y, BLOCKSIZE_z);
backward_linearisation_kernel_1<<<gridSize, blockSize>>>(ker, input, output, depth_input, dim_input, size_output); backward_linearisation_kernel_1<<<gridSize, blockSize>>>(ker, input, output, input_depth, input_width, size_output);
gpuErrchk( cudaPeekAtLastError() ); gpuErrchk( cudaPeekAtLastError() );
gpuErrchk( cudaDeviceSynchronize() ); gpuErrchk( cudaDeviceSynchronize() );
@ -409,14 +409,14 @@ void backward_linearisation_device(Kernel_nn* ker, float*** input, float*** inpu
// Second kernel // Second kernel
funcPtr d_function = get_activation_function_cuda(activation); funcPtr d_function = get_activation_function_cuda(activation);
backward_linearisation_kernel_2<<<gridSize, blockSize>>>(ker, input, input_z, output, depth_input, dim_input, size_output, d_function); backward_linearisation_kernel_2<<<gridSize, blockSize>>>(ker, input, input_z, output, input_depth, input_width, size_output, d_function);
gpuErrchk( cudaPeekAtLastError() ); gpuErrchk( cudaPeekAtLastError() );
gpuErrchk( cudaDeviceSynchronize() ); gpuErrchk( cudaDeviceSynchronize() );
} }
#endif #endif
void backward_linearisation_cpu(Kernel_nn* ker, float*** input, float*** input_z, float* output, int depth_input, int dim_input, int size_output, int activation) { void backward_linearisation_cpu(Kernel_nn* ker, float*** input, float*** input_z, float* output, int input_depth, int input_width, int size_output, int activation) {
funcPtr d_function = get_activation_function(activation); funcPtr d_function = get_activation_function(activation);
@ -427,9 +427,9 @@ void backward_linearisation_cpu(Kernel_nn* ker, float*** input, float*** input_z
// Weights // Weights
int cpt = 0; int cpt = 0;
for (int i=0; i < depth_input; i++) { for (int i=0; i < input_depth; i++) {
for (int k=0; k < dim_input; k++) { for (int k=0; k < input_width; k++) {
for (int l=0; l < dim_input; l++) { for (int l=0; l < input_width; l++) {
for (int j=0; j < size_output; j++) { for (int j=0; j < size_output; j++) {
ker->d_weights[cpt][j] += input[i][k][l]*output[j]; ker->d_weights[cpt][j] += input[i][k][l]*output[j];
} }
@ -440,9 +440,9 @@ void backward_linearisation_cpu(Kernel_nn* ker, float*** input, float*** input_z
// Input // Input
cpt = 0; cpt = 0;
for (int i=0; i < depth_input; i++) { for (int i=0; i < input_depth; i++) {
for (int k=0; k < dim_input; k++) { for (int k=0; k < input_width; k++) {
for (int l=0; l < dim_input; l++) { for (int l=0; l < input_width; l++) {
float tmp=0; float tmp=0;
for (int j=0; j < size_output; j++) { for (int j=0; j < size_output; j++) {
tmp += output[j]*ker->weights[cpt][j]; tmp += output[j]*ker->weights[cpt][j];
@ -457,11 +457,11 @@ void backward_linearisation_cpu(Kernel_nn* ker, float*** input, float*** input_z
#ifdef __CUDACC__ #ifdef __CUDACC__
extern "C" extern "C"
#endif #endif
void backward_linearisation(Kernel_nn* ker, float*** input, float*** input_z, float* output, int depth_input, int dim_input, int size_output, int activation) { void backward_linearisation(Kernel_nn* ker, float*** input, float*** input_z, float* output, int input_depth, int input_width, int size_output, int activation) {
#ifndef __CUDACC__ #ifndef __CUDACC__
backward_linearisation_cpu(ker, input, input_z, output, depth_input, dim_input, size_output, activation); backward_linearisation_cpu(ker, input, input_z, output, input_depth, input_width, size_output, activation);
#else #else
backward_linearisation_device(ker, input, input_z, output, depth_input, dim_input, size_output, activation); backward_linearisation_device(ker, input, input_z, output, input_depth, input_width, size_output, activation);
#endif #endif
} }
@ -469,18 +469,18 @@ void backward_linearisation(Kernel_nn* ker, float*** input, float*** input_z, fl
* Backward convolution * Backward convolution
*/ */
#ifdef __CUDACC__ #ifdef __CUDACC__
__global__ void backward_convolution_dbias_kernel(Kernel_cnn* ker, float*** output, int depth_output, int dim_output) { __global__ void backward_convolution_dbias_kernel(Kernel_cnn* ker, float*** output, int output_depth, int output_width) {
int idx = threadIdx.x + blockDim.x*blockIdx.x; int idx = threadIdx.x + blockDim.x*blockIdx.x;
int idy = threadIdx.y + blockDim.y*blockIdx.y; int idy = threadIdx.y + blockDim.y*blockIdx.y;
int idz = threadIdx.z + blockDim.z*blockIdx.z; int idz = threadIdx.z + blockDim.z*blockIdx.z;
if (idx >= depth_output || idy >= dim_output || idz >= dim_output) { if (idx >= output_depth || idy >= output_width || idz >= output_width) {
return; return;
} }
ker->d_bias[idx][idy][idz] += output[idx][idy][idz]; ker->d_bias[idx][idy][idz] += output[idx][idy][idz];
} }
__global__ void backward_convolution_dweight_kernel(Kernel_cnn* ker, float*** input, float*** output, int depth_input, int depth_output, int dim_output, int k_size) { __global__ void backward_convolution_dweight_kernel(Kernel_cnn* ker, float*** input, float*** output, int input_depth, int output_depth, int output_width, int k_size) {
int idx = threadIdx.x + blockDim.x*blockIdx.x; int idx = threadIdx.x + blockDim.x*blockIdx.x;
int idy = threadIdx.y + blockDim.y*blockIdx.y; int idy = threadIdx.y + blockDim.y*blockIdx.y;
int idz = threadIdx.z + blockDim.z*blockIdx.z; int idz = threadIdx.z + blockDim.z*blockIdx.z;
@ -488,35 +488,35 @@ __global__ void backward_convolution_dweight_kernel(Kernel_cnn* ker, float*** in
int idz1 = idz / k_size; int idz1 = idz / k_size;
int idz2 = idz % k_size; int idz2 = idz % k_size;
if (idx >= depth_input || idy >= depth_output || idz1 >= k_size || idz2 >= k_size) { if (idx >= input_depth || idy >= output_depth || idz1 >= k_size || idz2 >= k_size) {
return; return;
} }
float tmp = 0; float tmp = 0;
for (int l=0; l < dim_output; l++) { for (int l=0; l < output_width; l++) {
for (int m=0; m < dim_output; m++) { for (int m=0; m < output_width; m++) {
tmp += input[idx][l+idz1][m+idz2]*output[idy][l][m]; tmp += input[idx][l+idz1][m+idz2]*output[idy][l][m];
} }
} }
ker->d_weights[idx][idy][idz1][idz2] += tmp; ker->d_weights[idx][idy][idz1][idz2] += tmp;
} }
__global__ void backward_convolution_propagate_kernel(Kernel_cnn* ker, float*** input, float*** input_z, float*** output, int depth_input, int dim_input, int depth_output, int k_size, funcPtr d_f) { __global__ void backward_convolution_propagate_kernel(Kernel_cnn* ker, float*** input, float*** input_z, float*** output, int input_depth, int input_width, int output_depth, int k_size, funcPtr d_f) {
int idx = threadIdx.x + blockDim.x*blockIdx.x; int idx = threadIdx.x + blockDim.x*blockIdx.x;
int idy = threadIdx.y + blockDim.y*blockIdx.y; int idy = threadIdx.y + blockDim.y*blockIdx.y;
int idz = threadIdx.z + blockDim.z*blockIdx.z; int idz = threadIdx.z + blockDim.z*blockIdx.z;
if (idx >= depth_input || idy >= dim_input || idz >= dim_input) { if (idx >= input_depth || idy >= input_width || idz >= input_width) {
return; return;
} }
int min_m, max_m, min_n, max_n; int min_m, max_m, min_n, max_n;
float tmp = 0; float tmp = 0;
for (int l=0; l < depth_output; l++) { for (int l=0; l < output_depth; l++) {
min_m = max(0, k_size-1-idy); min_m = max(0, k_size-1-idy);
max_m = min(k_size, dim_input - idy); max_m = min(k_size, input_width - idy);
min_n = max(0, k_size-1-idz); min_n = max(0, k_size-1-idz);
max_n = min(k_size, dim_input-idz); max_n = min(k_size, input_width-idz);
for (int m=min_m; m < max_m; m++) { for (int m=min_m; m < max_m; m++) {
for (int n=min_n; n < max_n; n++) { for (int n=min_n; n < max_n; n++) {
tmp += output[l][idy-k_size+m+1][idz-k_size+n+1]*ker->weights[idx][l][m][n]; tmp += output[l][idy-k_size+m+1][idz-k_size+n+1]*ker->weights[idx][l][m][n];
@ -526,35 +526,35 @@ __global__ void backward_convolution_propagate_kernel(Kernel_cnn* ker, float***
input[idx][idy][idz] = tmp*( (*d_f)(input_z[idx][idy][idz]) ); input[idx][idy][idz] = tmp*( (*d_f)(input_z[idx][idy][idz]) );
} }
void backward_convolution_device(Kernel_cnn* ker, float*** input, float*** input_z, float*** output, int depth_input, int dim_input, int depth_output, int dim_output, int activation, int is_first) { void backward_convolution_device(Kernel_cnn* ker, float*** input, float*** input_z, float*** output, int input_depth, int input_width, int output_depth, int output_width, int activation, int is_first) {
// Bias Kernel // Bias Kernel
dim3 gridSize1(i_div_up(depth_output, BLOCKSIZE_x), i_div_up(dim_output, BLOCKSIZE_y), i_div_up(dim_output, BLOCKSIZE_y)); dim3 gridSize1(i_div_up(output_depth, BLOCKSIZE_x), i_div_up(output_width, BLOCKSIZE_y), i_div_up(output_width, BLOCKSIZE_y));
dim3 blockSize1(BLOCKSIZE_x, BLOCKSIZE_y, BLOCKSIZE_z); dim3 blockSize1(BLOCKSIZE_x, BLOCKSIZE_y, BLOCKSIZE_z);
backward_convolution_dbias_kernel<<<gridSize1, blockSize1>>>(ker, output, depth_output, dim_output); backward_convolution_dbias_kernel<<<gridSize1, blockSize1>>>(ker, output, output_depth, output_width);
gpuErrchk( cudaPeekAtLastError() ); gpuErrchk( cudaPeekAtLastError() );
gpuErrchk( cudaDeviceSynchronize() ); gpuErrchk( cudaDeviceSynchronize() );
// Weights Kernel // Weights Kernel
int k_size = dim_input - dim_output +1; int k_size = input_width - output_width +1;
dim3 gridSize2(i_div_up(depth_input, BLOCKSIZE_x), i_div_up(depth_output, BLOCKSIZE_y), i_div_up(k_size*k_size, BLOCKSIZE_y)); dim3 gridSize2(i_div_up(input_depth, BLOCKSIZE_x), i_div_up(output_depth, BLOCKSIZE_y), i_div_up(k_size*k_size, BLOCKSIZE_y));
dim3 blockSize2(BLOCKSIZE_x, BLOCKSIZE_y, BLOCKSIZE_z); dim3 blockSize2(BLOCKSIZE_x, BLOCKSIZE_y, BLOCKSIZE_z);
backward_convolution_dweight_kernel<<<gridSize2, blockSize2>>>(ker, input, output, depth_input, depth_output, dim_output, k_size); backward_convolution_dweight_kernel<<<gridSize2, blockSize2>>>(ker, input, output, input_depth, output_depth, output_width, k_size);
gpuErrchk( cudaPeekAtLastError() ); gpuErrchk( cudaPeekAtLastError() );
gpuErrchk( cudaDeviceSynchronize() ); gpuErrchk( cudaDeviceSynchronize() );
// input propagation Kernel // input propagation Kernel
if (is_first != 1) { if (is_first != 1) {
dim3 gridSize3(i_div_up(depth_input, BLOCKSIZE_x), i_div_up(dim_input, BLOCKSIZE_y), i_div_up(dim_input, BLOCKSIZE_y)); dim3 gridSize3(i_div_up(input_depth, BLOCKSIZE_x), i_div_up(input_width, BLOCKSIZE_y), i_div_up(input_width, BLOCKSIZE_y));
dim3 blockSize3(BLOCKSIZE_x, BLOCKSIZE_y, BLOCKSIZE_z); dim3 blockSize3(BLOCKSIZE_x, BLOCKSIZE_y, BLOCKSIZE_z);
funcPtr d_function = get_activation_function_cuda(activation); funcPtr d_function = get_activation_function_cuda(activation);
backward_convolution_propagate_kernel<<<gridSize3, blockSize3>>>(ker, input, input_z, output, depth_input, dim_input, depth_output, k_size, d_function); backward_convolution_propagate_kernel<<<gridSize3, blockSize3>>>(ker, input, input_z, output, input_depth, input_width, output_depth, k_size, d_function);
gpuErrchk( cudaPeekAtLastError() ); gpuErrchk( cudaPeekAtLastError() );
gpuErrchk( cudaDeviceSynchronize() ); gpuErrchk( cudaDeviceSynchronize() );
@ -563,29 +563,29 @@ void backward_convolution_device(Kernel_cnn* ker, float*** input, float*** input
#endif #endif
void backward_convolution_cpu(Kernel_cnn* ker, float*** input, float*** input_z, float*** output, int depth_input, int dim_input, int depth_output, int dim_output, int activation, int is_first) { void backward_convolution_cpu(Kernel_cnn* ker, float*** input, float*** input_z, float*** output, int input_depth, int input_width, int output_depth, int output_width, int activation, int is_first) {
funcPtr d_function = get_activation_function(activation); funcPtr d_function = get_activation_function(activation);
// Bias // Bias
for (int i=0; i < depth_output; i++) { for (int i=0; i < output_depth; i++) {
for (int j=0; j < dim_output; j++) { for (int j=0; j < output_width; j++) {
for (int k=0; k < dim_output; k++) { for (int k=0; k < output_width; k++) {
ker->d_bias[i][j][k] += output[i][j][k]; ker->d_bias[i][j][k] += output[i][j][k];
} }
} }
} }
// Weights // Weights
int k_size = dim_input - dim_output +1; int k_size = input_width - output_width +1;
for (int h=0; h < depth_input; h++) { for (int h=0; h < input_depth; h++) {
for (int i=0; i < depth_output; i++) { for (int i=0; i < output_depth; i++) {
for (int j=0; j < k_size; j++) { for (int j=0; j < k_size; j++) {
for (int k=0; k < k_size; k++) { for (int k=0; k < k_size; k++) {
float tmp = 0; float tmp = 0;
for (int l=0; l < dim_output; l++) { for (int l=0; l < output_width; l++) {
for (int m=0; m < dim_output; m++) { for (int m=0; m < output_width; m++) {
tmp += input[h][l+j][m+k]*output[i][l][m]; tmp += input[h][l+j][m+k]*output[i][l][m];
} }
} }
@ -599,15 +599,15 @@ void backward_convolution_cpu(Kernel_cnn* ker, float*** input, float*** input_z,
if (is_first==1) // Pas besoin de backpropager dans l'input if (is_first==1) // Pas besoin de backpropager dans l'input
return; return;
int min_m, max_m, min_n, max_n; int min_m, max_m, min_n, max_n;
for (int i=0; i < depth_input; i++) { for (int i=0; i < input_depth; i++) {
for (int j=0; j < dim_input; j++) { for (int j=0; j < input_width; j++) {
for (int k=0; k < dim_input; k++) { for (int k=0; k < input_width; k++) {
float tmp = 0; float tmp = 0;
for (int l=0; l < depth_output; l++) { for (int l=0; l < output_depth; l++) {
min_m = max(0, k_size-1-j); min_m = max(0, k_size-1-j);
max_m = min(k_size, dim_input - j); max_m = min(k_size, input_width - j);
min_n = max(0, k_size-1-k); min_n = max(0, k_size-1-k);
max_n = min(k_size, dim_input-k); max_n = min(k_size, input_width-k);
for (int m=min_m; m < max_m; m++) { for (int m=min_m; m < max_m; m++) {
for (int n=min_n; n < max_n; n++) { for (int n=min_n; n < max_n; n++) {
tmp += output[l][j-k_size+m+1][k-k_size+n+1]*ker->weights[i][l][m][n]; tmp += output[l][j-k_size+m+1][k-k_size+n+1]*ker->weights[i][l][m][n];
@ -623,10 +623,10 @@ void backward_convolution_cpu(Kernel_cnn* ker, float*** input, float*** input_z,
#ifdef __CUDACC__ #ifdef __CUDACC__
extern "C" extern "C"
#endif #endif
void backward_convolution(Kernel_cnn* ker, float*** input, float*** input_z, float*** output, int depth_input, int dim_input, int depth_output, int dim_output, int activation, int is_first) { void backward_convolution(Kernel_cnn* ker, float*** input, float*** input_z, float*** output, int input_depth, int input_width, int output_depth, int output_width, int activation, int is_first) {
#ifndef __CUDACC__ #ifndef __CUDACC__
backward_convolution_cpu(ker, input, input_z, output, depth_input, dim_input, depth_output, dim_output, activation, is_first); backward_convolution_cpu(ker, input, input_z, output, input_depth, input_width, output_depth, output_width, activation, is_first);
#else #else
backward_convolution_device(ker, input, input_z, output, depth_input, dim_input, depth_output, dim_output, activation, is_first); backward_convolution_device(ker, input, input_z, output, input_depth, input_width, output_depth, output_width, activation, is_first);
#endif #endif
} }

View File

@ -358,16 +358,16 @@ void backward_dense(Kernel_nn* ker, float* input, float* input_z, float* output,
* Backward linearisation * Backward linearisation
*/ */
#ifdef __CUDACC__ #ifdef __CUDACC__
__global__ void backward_linearisation_kernel_1(Kernel_nn* ker, float*** input, float* output, int depth_input, int dim_input, int size_output) { __global__ void backward_linearisation_kernel_1(Kernel_nn* ker, float*** input, float* output, int input_depth, int input_width, int size_output) {
int idx = threadIdx.x + blockDim.x*blockIdx.x; // < depth_input int idx = threadIdx.x + blockDim.x*blockIdx.x; // < input_depth
int idy = threadIdx.y + blockDim.y*blockIdx.y; // < dim_input int idy = threadIdx.y + blockDim.y*blockIdx.y; // < input_width
int idz = threadIdx.z + blockDim.z*blockIdx.z; // < dim_input int idz = threadIdx.z + blockDim.z*blockIdx.z; // < input_width
if (idx >= depth_input || idy >= dim_input || idz >= dim_input) { if (idx >= input_depth || idy >= input_width || idz >= input_width) {
return; return;
} }
int id = idx*dim_input*dim_input + idy*dim_input + idz; int id = idx*input_width*input_width + idy*input_width + idz;
for (int j=0; j < size_output; j++) { for (int j=0; j < size_output; j++) {
ker->d_weights[id][j] += input[idx][idy][idz]*output[j]; ker->d_weights[id][j] += input[idx][idy][idz]*output[j];
@ -379,15 +379,15 @@ __global__ void backward_linearisation_kernel_1(Kernel_nn* ker, float*** input,
} }
} }
__global__ void backward_linearisation_kernel_2(Kernel_nn* ker, float*** input, float*** input_z, float* output, int depth_input, int dim_input, int size_output, funcPtr d_f) { __global__ void backward_linearisation_kernel_2(Kernel_nn* ker, float*** input, float*** input_z, float* output, int input_depth, int input_width, int size_output, funcPtr d_f) {
int idx = threadIdx.x + blockDim.x*blockIdx.x; // < depth_input int idx = threadIdx.x + blockDim.x*blockIdx.x; // < input_depth
int idy = threadIdx.y + blockDim.y*blockIdx.y; // < dim_input int idy = threadIdx.y + blockDim.y*blockIdx.y; // < input_width
int idz = threadIdx.z + blockDim.z*blockIdx.z; // < dim_input int idz = threadIdx.z + blockDim.z*blockIdx.z; // < input_width
if (idx >= depth_input || idy >= dim_input || idz >= dim_input) { if (idx >= input_depth || idy >= input_width || idz >= input_width) {
return; return;
} }
int id = idx*dim_input*dim_input + idy*dim_input + idz; int id = idx*input_width*input_width + idy*input_width + idz;
float tmp=0; float tmp=0;
for (int j=0; j < size_output; j++) { for (int j=0; j < size_output; j++) {
@ -396,12 +396,12 @@ __global__ void backward_linearisation_kernel_2(Kernel_nn* ker, float*** input,
input[idx][idy][idz] = tmp*( (*d_f)(input_z[idx][idy][idz]) ); input[idx][idy][idz] = tmp*( (*d_f)(input_z[idx][idy][idz]) );
} }
void backward_linearisation_device(Kernel_nn* ker, float*** input, float*** input_z, float* output, int depth_input, int dim_input, int size_output, int activation) { void backward_linearisation_device(Kernel_nn* ker, float*** input, float*** input_z, float* output, int input_depth, int input_width, int size_output, int activation) {
// Make computation // Make computation
dim3 gridSize(i_div_up(depth_input, BLOCKSIZE_x), i_div_up(dim_input, BLOCKSIZE_y), i_div_up(dim_input, BLOCKSIZE_y)); dim3 gridSize(i_div_up(input_depth, BLOCKSIZE_x), i_div_up(input_width, BLOCKSIZE_y), i_div_up(input_width, BLOCKSIZE_y));
dim3 blockSize(BLOCKSIZE_x, BLOCKSIZE_y, BLOCKSIZE_z); dim3 blockSize(BLOCKSIZE_x, BLOCKSIZE_y, BLOCKSIZE_z);
backward_linearisation_kernel_1<<<gridSize, blockSize>>>(ker, input, output, depth_input, dim_input, size_output); backward_linearisation_kernel_1<<<gridSize, blockSize>>>(ker, input, output, input_depth, input_width, size_output);
gpuErrchk( cudaPeekAtLastError() ); gpuErrchk( cudaPeekAtLastError() );
gpuErrchk( cudaDeviceSynchronize() ); gpuErrchk( cudaDeviceSynchronize() );
@ -409,14 +409,14 @@ void backward_linearisation_device(Kernel_nn* ker, float*** input, float*** inpu
// Second kernel // Second kernel
funcPtr d_function = get_activation_function_cuda(activation); funcPtr d_function = get_activation_function_cuda(activation);
backward_linearisation_kernel_2<<<gridSize, blockSize>>>(ker, input, input_z, output, depth_input, dim_input, size_output, d_function); backward_linearisation_kernel_2<<<gridSize, blockSize>>>(ker, input, input_z, output, input_depth, input_width, size_output, d_function);
gpuErrchk( cudaPeekAtLastError() ); gpuErrchk( cudaPeekAtLastError() );
gpuErrchk( cudaDeviceSynchronize() ); gpuErrchk( cudaDeviceSynchronize() );
} }
#endif #endif
void backward_linearisation_cpu(Kernel_nn* ker, float*** input, float*** input_z, float* output, int depth_input, int dim_input, int size_output, int activation) { void backward_linearisation_cpu(Kernel_nn* ker, float*** input, float*** input_z, float* output, int input_depth, int input_width, int size_output, int activation) {
funcPtr d_function = get_activation_function(activation); funcPtr d_function = get_activation_function(activation);
@ -427,9 +427,9 @@ void backward_linearisation_cpu(Kernel_nn* ker, float*** input, float*** input_z
// Weights // Weights
int cpt = 0; int cpt = 0;
for (int i=0; i < depth_input; i++) { for (int i=0; i < input_depth; i++) {
for (int k=0; k < dim_input; k++) { for (int k=0; k < input_width; k++) {
for (int l=0; l < dim_input; l++) { for (int l=0; l < input_width; l++) {
for (int j=0; j < size_output; j++) { for (int j=0; j < size_output; j++) {
ker->d_weights[cpt][j] += input[i][k][l]*output[j]; ker->d_weights[cpt][j] += input[i][k][l]*output[j];
} }
@ -440,9 +440,9 @@ void backward_linearisation_cpu(Kernel_nn* ker, float*** input, float*** input_z
// Input // Input
cpt = 0; cpt = 0;
for (int i=0; i < depth_input; i++) { for (int i=0; i < input_depth; i++) {
for (int k=0; k < dim_input; k++) { for (int k=0; k < input_width; k++) {
for (int l=0; l < dim_input; l++) { for (int l=0; l < input_width; l++) {
float tmp=0; float tmp=0;
for (int j=0; j < size_output; j++) { for (int j=0; j < size_output; j++) {
tmp += output[j]*ker->weights[cpt][j]; tmp += output[j]*ker->weights[cpt][j];
@ -457,11 +457,11 @@ void backward_linearisation_cpu(Kernel_nn* ker, float*** input, float*** input_z
#ifdef __CUDACC__ #ifdef __CUDACC__
extern "C" extern "C"
#endif #endif
void backward_linearisation(Kernel_nn* ker, float*** input, float*** input_z, float* output, int depth_input, int dim_input, int size_output, int activation) { void backward_linearisation(Kernel_nn* ker, float*** input, float*** input_z, float* output, int input_depth, int input_width, int size_output, int activation) {
#ifndef __CUDACC__ #ifndef __CUDACC__
backward_linearisation_cpu(ker, input, input_z, output, depth_input, dim_input, size_output, activation); backward_linearisation_cpu(ker, input, input_z, output, input_depth, input_width, size_output, activation);
#else #else
backward_linearisation_device(ker, input, input_z, output, depth_input, dim_input, size_output, activation); backward_linearisation_device(ker, input, input_z, output, input_depth, input_width, size_output, activation);
#endif #endif
} }
@ -469,18 +469,18 @@ void backward_linearisation(Kernel_nn* ker, float*** input, float*** input_z, fl
* Backward convolution * Backward convolution
*/ */
#ifdef __CUDACC__ #ifdef __CUDACC__
__global__ void backward_convolution_dbias_kernel(Kernel_cnn* ker, float*** output, int depth_output, int dim_output) { __global__ void backward_convolution_dbias_kernel(Kernel_cnn* ker, float*** output, int output_depth, int output_width) {
int idx = threadIdx.x + blockDim.x*blockIdx.x; int idx = threadIdx.x + blockDim.x*blockIdx.x;
int idy = threadIdx.y + blockDim.y*blockIdx.y; int idy = threadIdx.y + blockDim.y*blockIdx.y;
int idz = threadIdx.z + blockDim.z*blockIdx.z; int idz = threadIdx.z + blockDim.z*blockIdx.z;
if (idx >= depth_output || idy >= dim_output || idz >= dim_output) { if (idx >= output_depth || idy >= output_width || idz >= output_width) {
return; return;
} }
ker->d_bias[idx][idy][idz] += output[idx][idy][idz]; ker->d_bias[idx][idy][idz] += output[idx][idy][idz];
} }
__global__ void backward_convolution_dweight_kernel(Kernel_cnn* ker, float*** input, float*** output, int depth_input, int depth_output, int dim_output, int k_size) { __global__ void backward_convolution_dweight_kernel(Kernel_cnn* ker, float*** input, float*** output, int input_depth, int output_depth, int output_width, int k_size) {
int idx = threadIdx.x + blockDim.x*blockIdx.x; int idx = threadIdx.x + blockDim.x*blockIdx.x;
int idy = threadIdx.y + blockDim.y*blockIdx.y; int idy = threadIdx.y + blockDim.y*blockIdx.y;
int idz = threadIdx.z + blockDim.z*blockIdx.z; int idz = threadIdx.z + blockDim.z*blockIdx.z;
@ -488,35 +488,35 @@ __global__ void backward_convolution_dweight_kernel(Kernel_cnn* ker, float*** in
int idz1 = idz / k_size; int idz1 = idz / k_size;
int idz2 = idz % k_size; int idz2 = idz % k_size;
if (idx >= depth_input || idy >= depth_output || idz1 >= k_size || idz2 >= k_size) { if (idx >= input_depth || idy >= output_depth || idz1 >= k_size || idz2 >= k_size) {
return; return;
} }
float tmp = 0; float tmp = 0;
for (int l=0; l < dim_output; l++) { for (int l=0; l < output_width; l++) {
for (int m=0; m < dim_output; m++) { for (int m=0; m < output_width; m++) {
tmp += input[idx][l+idz1][m+idz2]*output[idy][l][m]; tmp += input[idx][l+idz1][m+idz2]*output[idy][l][m];
} }
} }
ker->d_weights[idx][idy][idz1][idz2] += tmp; ker->d_weights[idx][idy][idz1][idz2] += tmp;
} }
__global__ void backward_convolution_propagate_kernel(Kernel_cnn* ker, float*** input, float*** input_z, float*** output, int depth_input, int dim_input, int depth_output, int k_size, funcPtr d_f) { __global__ void backward_convolution_propagate_kernel(Kernel_cnn* ker, float*** input, float*** input_z, float*** output, int input_depth, int input_width, int output_depth, int k_size, funcPtr d_f) {
int idx = threadIdx.x + blockDim.x*blockIdx.x; int idx = threadIdx.x + blockDim.x*blockIdx.x;
int idy = threadIdx.y + blockDim.y*blockIdx.y; int idy = threadIdx.y + blockDim.y*blockIdx.y;
int idz = threadIdx.z + blockDim.z*blockIdx.z; int idz = threadIdx.z + blockDim.z*blockIdx.z;
if (idx >= depth_input || idy >= dim_input || idz >= dim_input) { if (idx >= input_depth || idy >= input_width || idz >= input_width) {
return; return;
} }
int min_m, max_m, min_n, max_n; int min_m, max_m, min_n, max_n;
float tmp = 0; float tmp = 0;
for (int l=0; l < depth_output; l++) { for (int l=0; l < output_depth; l++) {
min_m = max(0, k_size-1-idy); min_m = max(0, k_size-1-idy);
max_m = min(k_size, dim_input - idy); max_m = min(k_size, input_width - idy);
min_n = max(0, k_size-1-idz); min_n = max(0, k_size-1-idz);
max_n = min(k_size, dim_input-idz); max_n = min(k_size, input_width-idz);
for (int m=min_m; m < max_m; m++) { for (int m=min_m; m < max_m; m++) {
for (int n=min_n; n < max_n; n++) { for (int n=min_n; n < max_n; n++) {
tmp += output[l][idy-k_size+m+1][idz-k_size+n+1]*ker->weights[idx][l][m][n]; tmp += output[l][idy-k_size+m+1][idz-k_size+n+1]*ker->weights[idx][l][m][n];
@ -526,35 +526,35 @@ __global__ void backward_convolution_propagate_kernel(Kernel_cnn* ker, float***
input[idx][idy][idz] = tmp*( (*d_f)(input_z[idx][idy][idz]) ); input[idx][idy][idz] = tmp*( (*d_f)(input_z[idx][idy][idz]) );
} }
void backward_convolution_device(Kernel_cnn* ker, float*** input, float*** input_z, float*** output, int depth_input, int dim_input, int depth_output, int dim_output, int activation, int is_first) { void backward_convolution_device(Kernel_cnn* ker, float*** input, float*** input_z, float*** output, int input_depth, int input_width, int output_depth, int output_width, int activation, int is_first) {
// Bias Kernel // Bias Kernel
dim3 gridSize1(i_div_up(depth_output, BLOCKSIZE_x), i_div_up(dim_output, BLOCKSIZE_y), i_div_up(dim_output, BLOCKSIZE_y)); dim3 gridSize1(i_div_up(output_depth, BLOCKSIZE_x), i_div_up(output_width, BLOCKSIZE_y), i_div_up(output_width, BLOCKSIZE_y));
dim3 blockSize1(BLOCKSIZE_x, BLOCKSIZE_y, BLOCKSIZE_z); dim3 blockSize1(BLOCKSIZE_x, BLOCKSIZE_y, BLOCKSIZE_z);
backward_convolution_dbias_kernel<<<gridSize1, blockSize1>>>(ker, output, depth_output, dim_output); backward_convolution_dbias_kernel<<<gridSize1, blockSize1>>>(ker, output, output_depth, output_width);
gpuErrchk( cudaPeekAtLastError() ); gpuErrchk( cudaPeekAtLastError() );
gpuErrchk( cudaDeviceSynchronize() ); gpuErrchk( cudaDeviceSynchronize() );
// Weights Kernel // Weights Kernel
int k_size = dim_input - dim_output +1; int k_size = input_width - output_width +1;
dim3 gridSize2(i_div_up(depth_input, BLOCKSIZE_x), i_div_up(depth_output, BLOCKSIZE_y), i_div_up(k_size*k_size, BLOCKSIZE_y)); dim3 gridSize2(i_div_up(input_depth, BLOCKSIZE_x), i_div_up(output_depth, BLOCKSIZE_y), i_div_up(k_size*k_size, BLOCKSIZE_y));
dim3 blockSize2(BLOCKSIZE_x, BLOCKSIZE_y, BLOCKSIZE_z); dim3 blockSize2(BLOCKSIZE_x, BLOCKSIZE_y, BLOCKSIZE_z);
backward_convolution_dweight_kernel<<<gridSize2, blockSize2>>>(ker, input, output, depth_input, depth_output, dim_output, k_size); backward_convolution_dweight_kernel<<<gridSize2, blockSize2>>>(ker, input, output, input_depth, output_depth, output_width, k_size);
gpuErrchk( cudaPeekAtLastError() ); gpuErrchk( cudaPeekAtLastError() );
gpuErrchk( cudaDeviceSynchronize() ); gpuErrchk( cudaDeviceSynchronize() );
// input propagation Kernel // input propagation Kernel
if (is_first != 1) { if (is_first != 1) {
dim3 gridSize3(i_div_up(depth_input, BLOCKSIZE_x), i_div_up(dim_input, BLOCKSIZE_y), i_div_up(dim_input, BLOCKSIZE_y)); dim3 gridSize3(i_div_up(input_depth, BLOCKSIZE_x), i_div_up(input_width, BLOCKSIZE_y), i_div_up(input_width, BLOCKSIZE_y));
dim3 blockSize3(BLOCKSIZE_x, BLOCKSIZE_y, BLOCKSIZE_z); dim3 blockSize3(BLOCKSIZE_x, BLOCKSIZE_y, BLOCKSIZE_z);
funcPtr d_function = get_activation_function_cuda(activation); funcPtr d_function = get_activation_function_cuda(activation);
backward_convolution_propagate_kernel<<<gridSize3, blockSize3>>>(ker, input, input_z, output, depth_input, dim_input, depth_output, k_size, d_function); backward_convolution_propagate_kernel<<<gridSize3, blockSize3>>>(ker, input, input_z, output, input_depth, input_width, output_depth, k_size, d_function);
gpuErrchk( cudaPeekAtLastError() ); gpuErrchk( cudaPeekAtLastError() );
gpuErrchk( cudaDeviceSynchronize() ); gpuErrchk( cudaDeviceSynchronize() );
@ -563,29 +563,29 @@ void backward_convolution_device(Kernel_cnn* ker, float*** input, float*** input
#endif #endif
void backward_convolution_cpu(Kernel_cnn* ker, float*** input, float*** input_z, float*** output, int depth_input, int dim_input, int depth_output, int dim_output, int activation, int is_first) { void backward_convolution_cpu(Kernel_cnn* ker, float*** input, float*** input_z, float*** output, int input_depth, int input_width, int output_depth, int output_width, int activation, int is_first) {
funcPtr d_function = get_activation_function(activation); funcPtr d_function = get_activation_function(activation);
// Bias // Bias
for (int i=0; i < depth_output; i++) { for (int i=0; i < output_depth; i++) {
for (int j=0; j < dim_output; j++) { for (int j=0; j < output_width; j++) {
for (int k=0; k < dim_output; k++) { for (int k=0; k < output_width; k++) {
ker->d_bias[i][j][k] += output[i][j][k]; ker->d_bias[i][j][k] += output[i][j][k];
} }
} }
} }
// Weights // Weights
int k_size = dim_input - dim_output +1; int k_size = input_width - output_width +1;
for (int h=0; h < depth_input; h++) { for (int h=0; h < input_depth; h++) {
for (int i=0; i < depth_output; i++) { for (int i=0; i < output_depth; i++) {
for (int j=0; j < k_size; j++) { for (int j=0; j < k_size; j++) {
for (int k=0; k < k_size; k++) { for (int k=0; k < k_size; k++) {
float tmp = 0; float tmp = 0;
for (int l=0; l < dim_output; l++) { for (int l=0; l < output_width; l++) {
for (int m=0; m < dim_output; m++) { for (int m=0; m < output_width; m++) {
tmp += input[h][l+j][m+k]*output[i][l][m]; tmp += input[h][l+j][m+k]*output[i][l][m];
} }
} }
@ -599,15 +599,15 @@ void backward_convolution_cpu(Kernel_cnn* ker, float*** input, float*** input_z,
if (is_first==1) // Pas besoin de backpropager dans l'input if (is_first==1) // Pas besoin de backpropager dans l'input
return; return;
int min_m, max_m, min_n, max_n; int min_m, max_m, min_n, max_n;
for (int i=0; i < depth_input; i++) { for (int i=0; i < input_depth; i++) {
for (int j=0; j < dim_input; j++) { for (int j=0; j < input_width; j++) {
for (int k=0; k < dim_input; k++) { for (int k=0; k < input_width; k++) {
float tmp = 0; float tmp = 0;
for (int l=0; l < depth_output; l++) { for (int l=0; l < output_depth; l++) {
min_m = max(0, k_size-1-j); min_m = max(0, k_size-1-j);
max_m = min(k_size, dim_input - j); max_m = min(k_size, input_width - j);
min_n = max(0, k_size-1-k); min_n = max(0, k_size-1-k);
max_n = min(k_size, dim_input-k); max_n = min(k_size, input_width-k);
for (int m=min_m; m < max_m; m++) { for (int m=min_m; m < max_m; m++) {
for (int n=min_n; n < max_n; n++) { for (int n=min_n; n < max_n; n++) {
tmp += output[l][j-k_size+m+1][k-k_size+n+1]*ker->weights[i][l][m][n]; tmp += output[l][j-k_size+m+1][k-k_size+n+1]*ker->weights[i][l][m][n];
@ -623,10 +623,10 @@ void backward_convolution_cpu(Kernel_cnn* ker, float*** input, float*** input_z,
#ifdef __CUDACC__ #ifdef __CUDACC__
extern "C" extern "C"
#endif #endif
void backward_convolution(Kernel_cnn* ker, float*** input, float*** input_z, float*** output, int depth_input, int dim_input, int depth_output, int dim_output, int activation, int is_first) { void backward_convolution(Kernel_cnn* ker, float*** input, float*** input_z, float*** output, int input_depth, int input_width, int output_depth, int output_width, int activation, int is_first) {
#ifndef __CUDACC__ #ifndef __CUDACC__
backward_convolution_cpu(ker, input, input_z, output, depth_input, dim_input, depth_output, dim_output, activation, is_first); backward_convolution_cpu(ker, input, input_z, output, input_depth, input_width, output_depth, output_width, activation, is_first);
#else #else
backward_convolution_device(ker, input, input_z, output, depth_input, dim_input, depth_output, dim_output, activation, is_first); backward_convolution_device(ker, input, input_z, output, input_depth, input_width, output_depth, output_width, activation, is_first);
#endif #endif
} }

View File

@ -12,27 +12,27 @@ int convolution_not_outside(int x, int y, int lower_bound, int upper_bound) {
return !(x < lower_bound || y < lower_bound || x >= upper_bound || y>= upper_bound); return !(x < lower_bound || y < lower_bound || x >= upper_bound || y>= upper_bound);
} }
void make_convolution_cpu(Kernel_cnn* kernel, float*** input, float*** output, int output_dim, int stride, int padding) { void make_convolution_cpu(Kernel_cnn* kernel, float*** input, float*** output, int output_width, int stride, int padding) {
// c'est le kernel de input // c'est le kernel de input
// input[kernel->rows][kernel_k_size + output_dim-1][kernel_k_size + output_dim-1] // input[kernel->rows][kernel_k_size + output_width-1][kernel_k_size + output_width-1]
// output[kernel->columns][output_dim][output_dim] // output[kernel->columns][output_width][output_width]
int k_columns = kernel->columns; int k_columns = kernel->columns;
int k_rows = kernel->rows; int k_rows = kernel->rows;
int max_move = kernel->k_size - padding; int max_move = kernel->k_size - padding;
int input_dim = output_dim*stride - 2*padding + kernel->k_size - stride; int input_width = output_width*stride - 2*padding + kernel->k_size - stride;
float f; float f;
for (int i=0; i < k_columns; i++) { // filtre for (int i=0; i < k_columns; i++) { // filtre
for (int j=0; j < output_dim; j++) { // ligne de sortie for (int j=0; j < output_width; j++) { // ligne de sortie
for (int k=0; k < output_dim; k++) { // colonne de sortie for (int k=0; k < output_width; k++) { // colonne de sortie
f = kernel->bias[i][j][k]; f = kernel->bias[i][j][k];
for (int a=0; a < k_rows; a++) { // Canal de couleur for (int a=0; a < k_rows; a++) { // Canal de couleur
for (int b=-padding; b < max_move; b++) { // ligne du filtre for (int b=-padding; b < max_move; b++) { // ligne du filtre
for (int c=-padding; c < max_move; c++) { // colonne du filtre for (int c=-padding; c < max_move; c++) { // colonne du filtre
int x = (stride*j+b); int x = (stride*j+b);
int y = (stride*k+c); int y = (stride*k+c);
if (convolution_not_outside(x, y, 0, input_dim)) { if (convolution_not_outside(x, y, 0, input_width)) {
f += kernel->weights[a][i][b][c]*input[a][stride*j+b][stride*k+c]; f += kernel->weights[a][i][b][c]*input[a][stride*j+b][stride*k+c];
} }
} }
@ -46,15 +46,15 @@ void make_convolution_cpu(Kernel_cnn* kernel, float*** input, float*** output, i
#ifdef __CUDACC__ #ifdef __CUDACC__
__global__ void make_convolution_kernel(Kernel_cnn* kernel, float*** input, float*** output, int output_dim, int stride, int padding) { __global__ void make_convolution_kernel(Kernel_cnn* kernel, float*** input, float*** output, int output_width, int stride, int padding) {
// Équivalents respectifs de i, j et k dans la boucle effectuée par le cpu // Équivalents respectifs de i, j et k dans la boucle effectuée par le cpu
int idx = threadIdx.x + blockDim.x*blockIdx.x; // < kernel->columns int idx = threadIdx.x + blockDim.x*blockIdx.x; // < kernel->columns
int idy = threadIdx.y + blockDim.y*blockIdx.y; // < min(output_dim, k_size) int idy = threadIdx.y + blockDim.y*blockIdx.y; // < min(output_width, k_size)
int idz = threadIdx.z + blockDim.z*blockIdx.z; // < min(output_dim, k_size) int idz = threadIdx.z + blockDim.z*blockIdx.z; // < min(output_width, k_size)
int max_move = kernel->k_size - padding; int max_move = kernel->k_size - padding;
int input_dim = output_dim*stride - 2*padding + kernel->k_size - stride; int input_width = output_width*stride - 2*padding + kernel->k_size - stride;
if (idx >= kernel->columns || idy >= output_dim || idz >= output_dim) { if (idx >= kernel->columns || idy >= output_width || idz >= output_width) {
return; return;
} }
@ -65,7 +65,7 @@ __global__ void make_convolution_kernel(Kernel_cnn* kernel, float*** input, floa
for (int c=-padding; c < max_move; c++) { for (int c=-padding; c < max_move; c++) {
int idy_2 = idy*stride+b; int idy_2 = idy*stride+b;
int idz_2 = idz*stride+c; int idz_2 = idz*stride+c;
if (convolution_not_outside(idy_2, idz_2, 0, input_dim)) { if (convolution_not_outside(idy_2, idz_2, 0, input_width)) {
f += kernel->weights[a][idx][b][c]*input[a][idy_2][idz_2]; f += kernel->weights[a][idx][b][c]*input[a][idy_2][idz_2];
} }
} }
@ -75,21 +75,21 @@ __global__ void make_convolution_kernel(Kernel_cnn* kernel, float*** input, floa
output[idx][idy][idz] = f; output[idx][idy][idz] = f;
} }
void make_convolution_device(Kernel_cnn* kernel, float*** input, float*** output, int output_dim, int stride, int padding) { void make_convolution_device(Kernel_cnn* kernel, float*** input, float*** output, int output_width, int stride, int padding) {
// Make computation // Make computation
dim3 gridSize(i_div_up(kernel->columns, BLOCKSIZE_x), i_div_up(output_dim, BLOCKSIZE_y), i_div_up(output_dim, BLOCKSIZE_z)); dim3 gridSize(i_div_up(kernel->columns, BLOCKSIZE_x), i_div_up(output_width, BLOCKSIZE_y), i_div_up(output_width, BLOCKSIZE_z));
dim3 blockSize(BLOCKSIZE_x, BLOCKSIZE_y, BLOCKSIZE_z); dim3 blockSize(BLOCKSIZE_x, BLOCKSIZE_y, BLOCKSIZE_z);
make_convolution_kernel<<<gridSize, blockSize>>>(kernel, input, output, output_dim, stride, padding); make_convolution_kernel<<<gridSize, blockSize>>>(kernel, input, output, output_width, stride, padding);
gpuErrchk( cudaPeekAtLastError() ); gpuErrchk( cudaPeekAtLastError() );
gpuErrchk( cudaDeviceSynchronize() ); gpuErrchk( cudaDeviceSynchronize() );
} }
#endif #endif
void make_convolution(Kernel_cnn* kernel, float*** input, float*** output, int output_dim, int stride, int padding) { void make_convolution(Kernel_cnn* kernel, float*** input, float*** output, int output_width, int stride, int padding) {
#ifndef __CUDACC__ #ifndef __CUDACC__
make_convolution_cpu(kernel, input, output, output_dim, stride, padding); make_convolution_cpu(kernel, input, output, output_width, stride, padding);
#else #else
make_convolution_device(kernel, input, output, output_dim, stride, padding); make_convolution_device(kernel, input, output, output_width, stride, padding);
#endif #endif
} }

View File

@ -16,27 +16,27 @@ int convolution_not_outside(int x, int y, int lower_bound, int upper_bound) {
return !(x < lower_bound || y < lower_bound || x >= upper_bound || y>= upper_bound); return !(x < lower_bound || y < lower_bound || x >= upper_bound || y>= upper_bound);
} }
void make_convolution_cpu(Kernel_cnn* kernel, float*** input, float*** output, int output_dim, int stride, int padding) { void make_convolution_cpu(Kernel_cnn* kernel, float*** input, float*** output, int output_width, int stride, int padding) {
// c'est le kernel de input // c'est le kernel de input
// input[kernel->rows][kernel_k_size + output_dim-1][kernel_k_size + output_dim-1] // input[kernel->rows][kernel_k_size + output_width-1][kernel_k_size + output_width-1]
// output[kernel->columns][output_dim][output_dim] // output[kernel->columns][output_width][output_width]
int k_columns = kernel->columns; int k_columns = kernel->columns;
int k_rows = kernel->rows; int k_rows = kernel->rows;
int max_move = kernel->k_size - padding; int max_move = kernel->k_size - padding;
int input_dim = output_dim*stride - 2*padding + kernel->k_size - stride; int input_width = output_width*stride - 2*padding + kernel->k_size - stride;
float f; float f;
for (int i=0; i < k_columns; i++) { // filtre for (int i=0; i < k_columns; i++) { // filtre
for (int j=0; j < output_dim; j++) { // ligne de sortie for (int j=0; j < output_width; j++) { // ligne de sortie
for (int k=0; k < output_dim; k++) { // colonne de sortie for (int k=0; k < output_width; k++) { // colonne de sortie
f = kernel->bias[i][j][k]; f = kernel->bias[i][j][k];
for (int a=0; a < k_rows; a++) { // Canal de couleur for (int a=0; a < k_rows; a++) { // Canal de couleur
for (int b=-padding; b < max_move; b++) { // ligne du filtre for (int b=-padding; b < max_move; b++) { // ligne du filtre
for (int c=-padding; c < max_move; c++) { // colonne du filtre for (int c=-padding; c < max_move; c++) { // colonne du filtre
int x = (stride*j+b); int x = (stride*j+b);
int y = (stride*k+c); int y = (stride*k+c);
if (convolution_not_outside(x, y, 0, input_dim)) { if (convolution_not_outside(x, y, 0, input_width)) {
f += kernel->weights[a][i][b][c]*input[a][stride*j+b][stride*k+c]; f += kernel->weights[a][i][b][c]*input[a][stride*j+b][stride*k+c];
} }
} }
@ -50,15 +50,15 @@ void make_convolution_cpu(Kernel_cnn* kernel, float*** input, float*** output, i
#ifdef __CUDACC__ #ifdef __CUDACC__
__global__ void make_convolution_kernel(Kernel_cnn* kernel, float*** input, float*** output, int output_dim, int stride, int padding) { __global__ void make_convolution_kernel(Kernel_cnn* kernel, float*** input, float*** output, int output_width, int stride, int padding) {
// Équivalents respectifs de i, j et k dans la boucle effectuée par le cpu // Équivalents respectifs de i, j et k dans la boucle effectuée par le cpu
int idx = threadIdx.x + blockDim.x*blockIdx.x; // < kernel->columns int idx = threadIdx.x + blockDim.x*blockIdx.x; // < kernel->columns
int idy = threadIdx.y + blockDim.y*blockIdx.y; // < min(output_dim, k_size) int idy = threadIdx.y + blockDim.y*blockIdx.y; // < min(output_width, k_size)
int idz = threadIdx.z + blockDim.z*blockIdx.z; // < min(output_dim, k_size) int idz = threadIdx.z + blockDim.z*blockIdx.z; // < min(output_width, k_size)
int max_move = kernel->k_size - padding; int max_move = kernel->k_size - padding;
int input_dim = output_dim*stride - 2*padding + kernel->k_size - stride; int input_width = output_width*stride - 2*padding + kernel->k_size - stride;
if (idx >= kernel->columns || idy >= output_dim || idz >= output_dim) { if (idx >= kernel->columns || idy >= output_width || idz >= output_width) {
return; return;
} }
@ -69,7 +69,7 @@ __global__ void make_convolution_kernel(Kernel_cnn* kernel, float*** input, floa
for (int c=-padding; c < max_move; c++) { for (int c=-padding; c < max_move; c++) {
int idy_2 = idy*stride+b; int idy_2 = idy*stride+b;
int idz_2 = idz*stride+c; int idz_2 = idz*stride+c;
if (convolution_not_outside(idy_2, idz_2, 0, input_dim)) { if (convolution_not_outside(idy_2, idz_2, 0, input_width)) {
f += kernel->weights[a][idx][b][c]*input[a][idy_2][idz_2]; f += kernel->weights[a][idx][b][c]*input[a][idy_2][idz_2];
} }
} }
@ -79,21 +79,21 @@ __global__ void make_convolution_kernel(Kernel_cnn* kernel, float*** input, floa
output[idx][idy][idz] = f; output[idx][idy][idz] = f;
} }
void make_convolution_device(Kernel_cnn* kernel, float*** input, float*** output, int output_dim, int stride, int padding) { void make_convolution_device(Kernel_cnn* kernel, float*** input, float*** output, int output_width, int stride, int padding) {
// Make computation // Make computation
dim3 gridSize(i_div_up(kernel->columns, BLOCKSIZE_x), i_div_up(output_dim, BLOCKSIZE_y), i_div_up(output_dim, BLOCKSIZE_z)); dim3 gridSize(i_div_up(kernel->columns, BLOCKSIZE_x), i_div_up(output_width, BLOCKSIZE_y), i_div_up(output_width, BLOCKSIZE_z));
dim3 blockSize(BLOCKSIZE_x, BLOCKSIZE_y, BLOCKSIZE_z); dim3 blockSize(BLOCKSIZE_x, BLOCKSIZE_y, BLOCKSIZE_z);
make_convolution_kernel<<<gridSize, blockSize>>>(kernel, input, output, output_dim, stride, padding); make_convolution_kernel<<<gridSize, blockSize>>>(kernel, input, output, output_width, stride, padding);
gpuErrchk( cudaPeekAtLastError() ); gpuErrchk( cudaPeekAtLastError() );
gpuErrchk( cudaDeviceSynchronize() ); gpuErrchk( cudaDeviceSynchronize() );
} }
#endif #endif
void make_convolution(Kernel_cnn* kernel, float*** input, float*** output, int output_dim, int stride, int padding) { void make_convolution(Kernel_cnn* kernel, float*** input, float*** output, int output_width, int stride, int padding) {
#ifndef __CUDACC__ #ifndef __CUDACC__
make_convolution_cpu(kernel, input, output, output_dim, stride, padding); make_convolution_cpu(kernel, input, output, output_width, stride, padding);
#else #else
make_convolution_device(kernel, input, output, output_dim, stride, padding); make_convolution_device(kernel, input, output, output_width, stride, padding);
#endif #endif
} }

View File

@ -9,7 +9,7 @@
#include "include/creation.h" #include "include/creation.h"
Network* create_network(int max_size, float learning_rate, int dropout, int activation, int initialisation, int input_dim, int input_depth) { Network* create_network(int max_size, float learning_rate, int dropout, int activation, int initialisation, int input_width, int input_depth) {
if (dropout < 0 || dropout > 100) { if (dropout < 0 || dropout > 100) {
printf_error("La probabilité de dropout n'est pas respecté, elle doit être comprise entre 0 et 100\n"); printf_error("La probabilité de dropout n'est pas respecté, elle doit être comprise entre 0 et 100\n");
} }
@ -29,17 +29,17 @@ Network* create_network(int max_size, float learning_rate, int dropout, int acti
} }
network->kernel[0]->linearisation = DOESNT_LINEARISE; network->kernel[0]->linearisation = DOESNT_LINEARISE;
network->kernel[0]->activation = activation; network->kernel[0]->activation = activation;
network->width[0] = input_dim; network->width[0] = input_width;
network->depth[0] = input_depth; network->depth[0] = input_depth;
network->kernel[0]->nn = NULL; network->kernel[0]->nn = NULL;
network->kernel[0]->cnn = NULL; network->kernel[0]->cnn = NULL;
create_a_cube_input_layer(network, 0, input_depth, input_dim); create_a_cube_input_layer(network, 0, input_depth, input_width);
create_a_cube_input_z_layer(network, 0, input_depth, input_dim); create_a_cube_input_z_layer(network, 0, input_depth, input_width);
return network; return network;
} }
Network* create_network_lenet5(float learning_rate, int dropout, int activation, int initialisation, int input_dim, int input_depth) { Network* create_network_lenet5(float learning_rate, int dropout, int activation, int initialisation, int input_width, int input_depth) {
Network* network = create_network(8, learning_rate, dropout, activation, initialisation, input_dim, input_depth); Network* network = create_network(8, learning_rate, dropout, activation, initialisation, input_width, input_depth);
add_convolution(network, 5, 6, 1, 0, activation); add_convolution(network, 5, 6, 1, 0, activation);
add_average_pooling(network, 2, 2, 0); add_average_pooling(network, 2, 2, 0);
add_convolution(network, 5, 16, 1, 0, activation); add_convolution(network, 5, 16, 1, 0, activation);
@ -50,8 +50,8 @@ Network* create_network_lenet5(float learning_rate, int dropout, int activation,
return network; return network;
} }
Network* create_simple_one(float learning_rate, int dropout, int activation, int initialisation, int input_dim, int input_depth) { Network* create_simple_one(float learning_rate, int dropout, int activation, int initialisation, int input_width, int input_depth) {
Network* network = create_network(3, learning_rate, dropout, activation, initialisation, input_dim, input_depth); Network* network = create_network(3, learning_rate, dropout, activation, initialisation, input_width, input_depth);
add_dense_linearisation(network, 80, activation); add_dense_linearisation(network, 80, activation);
add_dense(network, 10, SOFTMAX); add_dense(network, 10, SOFTMAX);
return network; return network;
@ -104,8 +104,8 @@ void add_average_pooling(Network* network, int kernel_size, int stride, int padd
printf_error("Impossible de rajouter une couche d'average pooling, le réseau est déjà plein\n"); printf_error("Impossible de rajouter une couche d'average pooling, le réseau est déjà plein\n");
return; return;
} }
int dim_input = network->width[k_pos]; int input_width = network->width[k_pos];
int dim_output = (2*padding + dim_input - (kernel_size - stride))/stride; int output_width = (2*padding + input_width - (kernel_size - stride))/stride;
network->kernel[k_pos]->cnn = NULL; network->kernel[k_pos]->cnn = NULL;
network->kernel[k_pos]->nn = NULL; network->kernel[k_pos]->nn = NULL;
@ -115,8 +115,8 @@ void add_average_pooling(Network* network, int kernel_size, int stride, int padd
network->kernel[k_pos]->linearisation = DOESNT_LINEARISE; network->kernel[k_pos]->linearisation = DOESNT_LINEARISE;
network->kernel[k_pos]->pooling = AVG_POOLING; network->kernel[k_pos]->pooling = AVG_POOLING;
create_a_cube_input_layer(network, n, network->depth[n-1], dim_output); create_a_cube_input_layer(network, n, network->depth[n-1], output_width);
create_a_cube_input_z_layer(network, n, network->depth[n-1], dim_output); create_a_cube_input_z_layer(network, n, network->depth[n-1], output_width);
network->size++; network->size++;
} }
@ -127,8 +127,8 @@ void add_max_pooling(Network* network, int kernel_size, int stride, int padding)
printf_error("Impossible de rajouter une couche de max pooling, le réseau est déjà plein\n"); printf_error("Impossible de rajouter une couche de max pooling, le réseau est déjà plein\n");
return; return;
} }
int dim_input = network->width[k_pos]; int input_width = network->width[k_pos];
int dim_output = (2*padding + dim_input - (kernel_size - stride))/stride; int output_width = (2*padding + input_width - (kernel_size - stride))/stride;
network->kernel[k_pos]->cnn = NULL; network->kernel[k_pos]->cnn = NULL;
network->kernel[k_pos]->nn = NULL; network->kernel[k_pos]->nn = NULL;
@ -136,8 +136,8 @@ void add_max_pooling(Network* network, int kernel_size, int stride, int padding)
network->kernel[k_pos]->linearisation = DOESNT_LINEARISE; network->kernel[k_pos]->linearisation = DOESNT_LINEARISE;
network->kernel[k_pos]->pooling = MAX_POOLING; network->kernel[k_pos]->pooling = MAX_POOLING;
create_a_cube_input_layer(network, n, network->depth[n-1], dim_output); create_a_cube_input_layer(network, n, network->depth[n-1], output_width);
create_a_cube_input_z_layer(network, n, network->depth[n-1], dim_output); create_a_cube_input_z_layer(network, n, network->depth[n-1], output_width);
network->size++; network->size++;
} }
@ -148,13 +148,13 @@ void add_convolution(Network* network, int kernel_size, int number_of_kernels, i
printf_error("Impossible de rajouter une couche de convolution, le réseau est déjà plein \n"); printf_error("Impossible de rajouter une couche de convolution, le réseau est déjà plein \n");
return; return;
} }
int depth_input = network->depth[k_pos]; int input_depth = network->depth[k_pos];
int dim_input = network->width[k_pos]; int input_width = network->width[k_pos];
int dim_output = (2*padding + dim_input - (kernel_size - stride))/stride; int output_width = (2*padding + input_width - (kernel_size - stride))/stride;
int depth_output = number_of_kernels; int output_depth = number_of_kernels;
int bias_size = dim_output; int bias_size = output_width;
network->kernel[k_pos]->nn = NULL; network->kernel[k_pos]->nn = NULL;
network->kernel[k_pos]->stride = stride; network->kernel[k_pos]->stride = stride;
@ -166,23 +166,23 @@ void add_convolution(Network* network, int kernel_size, int number_of_kernels, i
network->kernel[k_pos]->cnn = (Kernel_cnn*)nalloc(1, sizeof(Kernel_cnn)); network->kernel[k_pos]->cnn = (Kernel_cnn*)nalloc(1, sizeof(Kernel_cnn));
Kernel_cnn* cnn = network->kernel[k_pos]->cnn; Kernel_cnn* cnn = network->kernel[k_pos]->cnn;
cnn->k_size = kernel_size; cnn->k_size = kernel_size;
cnn->rows = depth_input; cnn->rows = input_depth;
cnn->columns = depth_output; cnn->columns = output_depth;
cnn->weights = (float****)nalloc(depth_input, sizeof(float***)); cnn->weights = (float****)nalloc(input_depth, sizeof(float***));
cnn->d_weights = (float****)nalloc(depth_input, sizeof(float***)); cnn->d_weights = (float****)nalloc(input_depth, sizeof(float***));
#ifdef ADAM_CNN_WEIGHTS #ifdef ADAM_CNN_WEIGHTS
cnn->s_d_weights = (float****)nalloc(depth_input, sizeof(float***)); cnn->s_d_weights = (float****)nalloc(input_depth, sizeof(float***));
cnn->v_d_weights = (float****)nalloc(depth_input, sizeof(float***)); cnn->v_d_weights = (float****)nalloc(input_depth, sizeof(float***));
#endif #endif
for (int i=0; i < depth_input; i++) { for (int i=0; i < input_depth; i++) {
cnn->weights[i] = (float***)nalloc(depth_output, sizeof(float**)); cnn->weights[i] = (float***)nalloc(output_depth, sizeof(float**));
cnn->d_weights[i] = (float***)nalloc(depth_output, sizeof(float**)); cnn->d_weights[i] = (float***)nalloc(output_depth, sizeof(float**));
#ifdef ADAM_CNN_WEIGHTS #ifdef ADAM_CNN_WEIGHTS
cnn->s_d_weights[i] = (float***)nalloc(depth_output, sizeof(float**)); cnn->s_d_weights[i] = (float***)nalloc(output_depth, sizeof(float**));
cnn->v_d_weights[i] = (float***)nalloc(depth_output, sizeof(float**)); cnn->v_d_weights[i] = (float***)nalloc(output_depth, sizeof(float**));
#endif #endif
for (int j=0; j < depth_output; j++) { for (int j=0; j < output_depth; j++) {
cnn->weights[i][j] = (float**)nalloc(kernel_size, sizeof(float*)); cnn->weights[i][j] = (float**)nalloc(kernel_size, sizeof(float*));
cnn->d_weights[i][j] = (float**)nalloc(kernel_size, sizeof(float*)); cnn->d_weights[i][j] = (float**)nalloc(kernel_size, sizeof(float*));
#ifdef ADAM_CNN_WEIGHTS #ifdef ADAM_CNN_WEIGHTS
@ -207,13 +207,13 @@ void add_convolution(Network* network, int kernel_size, int number_of_kernels, i
} }
} }
cnn->bias = (float***)nalloc(depth_output, sizeof(float**)); cnn->bias = (float***)nalloc(output_depth, sizeof(float**));
cnn->d_bias = (float***)nalloc(depth_output, sizeof(float**)); cnn->d_bias = (float***)nalloc(output_depth, sizeof(float**));
#ifdef ADAM_CNN_BIAS #ifdef ADAM_CNN_BIAS
cnn->s_d_bias = (float***)nalloc(depth_output, sizeof(float**)); cnn->s_d_bias = (float***)nalloc(output_depth, sizeof(float**));
cnn->v_d_bias = (float***)nalloc(depth_output, sizeof(float**)); cnn->v_d_bias = (float***)nalloc(output_depth, sizeof(float**));
#endif #endif
for (int i=0; i < depth_output; i++) { for (int i=0; i < output_depth; i++) {
cnn->bias[i] = (float**)nalloc(bias_size, sizeof(float*)); cnn->bias[i] = (float**)nalloc(bias_size, sizeof(float*));
cnn->d_bias[i] = (float**)nalloc(bias_size, sizeof(float*)); cnn->d_bias[i] = (float**)nalloc(bias_size, sizeof(float*));
#ifdef ADAM_CNN_BIAS #ifdef ADAM_CNN_BIAS
@ -239,10 +239,10 @@ void add_convolution(Network* network, int kernel_size, int number_of_kernels, i
int n_in = network->width[n-1]*network->width[n-1]*network->depth[n-1]; int n_in = network->width[n-1]*network->width[n-1]*network->depth[n-1];
int n_out = network->width[n]*network->width[n]*network->depth[n]; int n_out = network->width[n]*network->width[n]*network->depth[n];
initialisation_3d_matrix(network->initialisation, cnn->bias, depth_output, dim_output, dim_output, n_in, n_out); initialisation_3d_matrix(network->initialisation, cnn->bias, output_depth, output_width, output_width, n_in, n_out);
initialisation_4d_matrix(network->initialisation, cnn->weights, depth_input, depth_output, kernel_size, kernel_size, n_in, n_out); initialisation_4d_matrix(network->initialisation, cnn->weights, input_depth, output_depth, kernel_size, kernel_size, n_in, n_out);
create_a_cube_input_layer(network, n, depth_output, bias_size); create_a_cube_input_layer(network, n, output_depth, bias_size);
create_a_cube_input_z_layer(network, n, depth_output, bias_size); create_a_cube_input_z_layer(network, n, output_depth, bias_size);
network->size++; network->size++;
} }

View File

@ -59,7 +59,7 @@ extern "C"
/* /*
* Transfert les informations d'erreur à travers une couche de linéarisation * Transfert les informations d'erreur à travers une couche de linéarisation
*/ */
void backward_linearisation(Kernel_nn* ker, float*** input, float*** input_z, float* output, int depth_input, int dim_input, int size_output, int activation); void backward_linearisation(Kernel_nn* ker, float*** input, float*** input_z, float* output, int input_depth, int input_width, int size_output, int activation);
#ifdef __CUDACC__ #ifdef __CUDACC__
@ -68,6 +68,6 @@ extern "C"
/* /*
* Transfert les informations d'erreur à travers un couche de convolution * Transfert les informations d'erreur à travers un couche de convolution
*/ */
void backward_convolution(Kernel_cnn* ker, float*** input, float*** input_z, float*** output, int depth_input, int dim_input, int depth_output, int dim_output, int activation, int is_first); void backward_convolution(Kernel_cnn* ker, float*** input, float*** input_z, float*** output, int input_depth, int input_width, int output_depth, int output_width, int activation, int is_first);
#endif #endif

View File

@ -10,21 +10,21 @@ int convolution_not_outside(int x, int y, int lower_bound, int upper_bound);
/* /*
* Effectue la convolution naïvement sur le processeur * Effectue la convolution naïvement sur le processeur
*/ */
void make_convolution_cpu(Kernel_cnn* kernel, float*** input, float*** output, int output_dim, int stride, int padding); void make_convolution_cpu(Kernel_cnn* kernel, float*** input, float*** output, int output_width, int stride, int padding);
#ifdef __CUDACC__ #ifdef __CUDACC__
/* /*
* Kernel de la convolution sur carte graphique * Kernel de la convolution sur carte graphique
*/ */
__global__ void make_convolution_kernel(int k_size, int columns, int rows, float* bias, size_t pitch_bias, float**** weights, size_t pitch_weights, float*** input, size_t pitch_input, float*** output, size_t pitch_output, int output_dim, int stride, int padding); __global__ void make_convolution_kernel(int k_size, int columns, int rows, float* bias, size_t pitch_bias, float**** weights, size_t pitch_weights, float*** input, size_t pitch_input, float*** output, size_t pitch_output, int output_width, int stride, int padding);
/* /*
* Effectue la convolution naïvement sur la carte graphique * Effectue la convolution naïvement sur la carte graphique
*/ */
void make_convolution_device(Kernel_cnn* kernel, float*** input, float*** output, int output_dim, int stride, int padding); void make_convolution_device(Kernel_cnn* kernel, float*** input, float*** output, int output_width, int stride, int padding);
#endif #endif
/* /*
* Détermine si la convolution peut-être faite sur la carte graphique au moment de la compilation * Détermine si la convolution peut-être faite sur la carte graphique au moment de la compilation
*/ */
void make_convolution(Kernel_cnn* kernel, float*** input, float*** output, int output_dim, int stride, int padding); void make_convolution(Kernel_cnn* kernel, float*** input, float*** output, int output_width, int stride, int padding);

View File

@ -7,17 +7,17 @@
/* /*
* Créé un réseau qui peut contenir max_size couche (dont celle d'input et d'output) * Créé un réseau qui peut contenir max_size couche (dont celle d'input et d'output)
*/ */
Network* create_network(int max_size, float learning_rate, int dropout, int activation, int initialisation, int input_dim, int input_depth); Network* create_network(int max_size, float learning_rate, int dropout, int activation, int initialisation, int input_width, int input_depth);
/* /*
* Renvoie un réseau suivant l'architecture LeNet5 * Renvoie un réseau suivant l'architecture LeNet5
*/ */
Network* create_network_lenet5(float learning_rate, int dropout, int activation, int initialisation, int input_dim, int input_depth); Network* create_network_lenet5(float learning_rate, int dropout, int activation, int initialisation, int input_width, int input_depth);
/* /*
* Renvoie un réseau sans convolution, similaire à celui utilisé dans src/dense * Renvoie un réseau sans convolution, similaire à celui utilisé dans src/dense
*/ */
Network* create_simple_one(float learning_rate, int dropout, int activation, int initialisation, int input_dim, int input_depth); Network* create_simple_one(float learning_rate, int dropout, int activation, int initialisation, int input_width, int input_depth);
/* /*
* Créé et alloue de la mémoire à une couche de type input cube * Créé et alloue de la mémoire à une couche de type input cube
@ -49,7 +49,7 @@ void add_max_pooling(Network* network, int kernel_size, int stride, int padding)
/* /*
* Ajoute au réseau une couche de convolution avec la taille de noyau (kernel_size), * Ajoute au réseau une couche de convolution avec la taille de noyau (kernel_size),
* le remplissage (padding) et le décalge (stride) choisis. Le choix de la profondeur de * le remplissage (padding) et le décalge (stride) choisis. Le choix de la profondeur de
* la couche suivante se fait avec number_of_kernels (= depth_output) * la couche suivante se fait avec number_of_kernels (= output_depth)
* Puis initialise les poids et les biais construits * Puis initialise les poids et les biais construits
*/ */
void add_convolution(Network* network, int kernel_size, int number_of_kernels, int stride, int padding, int activation); void add_convolution(Network* network, int kernel_size, int number_of_kernels, int stride, int padding, int activation);

View File

@ -14,12 +14,12 @@ int pooling_not_outside(int x, int y, int lower_bound, int upper_bound);
/* /*
* Effectue la propagation d'une convolution avec stride et padding choisis sur le processeur * Effectue la propagation d'une convolution avec stride et padding choisis sur le processeur
*/ */
void make_convolution_cpu(Kernel_cnn* kernel, float*** input, float*** output, int output_dim, int stride, int padding); void make_convolution_cpu(Kernel_cnn* kernel, float*** input, float*** output, int output_width, int stride, int padding);
/* /*
* Effectue la propagation d'une convolution avec stride et padding choisis sur le CPU ou GPU * Effectue la propagation d'une convolution avec stride et padding choisis sur le CPU ou GPU
*/ */
void make_convolution(Kernel_cnn* kernel, float*** input, float*** output, int output_dim, int stride, int padding); void make_convolution(Kernel_cnn* kernel, float*** input, float*** output, int output_width, int stride, int padding);
#ifdef __CUDACC__ #ifdef __CUDACC__
extern "C" extern "C"
@ -27,7 +27,7 @@ extern "C"
/* /*
* Effectue propagation d'average pooling avec stride et padding choisis * Effectue propagation d'average pooling avec stride et padding choisis
*/ */
void make_average_pooling(float*** input, float*** output, int size, int output_depth, int output_dim, int stride, int padding); void make_average_pooling(float*** input, float*** output, int size, int output_depth, int output_width, int stride, int padding);
#ifdef __CUDACC__ #ifdef __CUDACC__
extern "C" extern "C"
@ -35,7 +35,7 @@ extern "C"
/* /*
* Effectue propagation de max pooling avec stride et padding choisis * Effectue propagation de max pooling avec stride et padding choisis
*/ */
void make_max_pooling(float*** input, float*** output, int size, int output_depth, int output_dim, int stride, int padding); void make_max_pooling(float*** input, float*** output, int size, int output_depth, int output_width, int stride, int padding);
#ifdef __CUDACC__ #ifdef __CUDACC__
extern "C" extern "C"
@ -51,6 +51,6 @@ extern "C"
/* /*
* Effectue la propagation d'une couche dense qui passe d'une matrice à un vecteur * Effectue la propagation d'une couche dense qui passe d'une matrice à un vecteur
*/ */
void make_dense_linearized(Kernel_nn* kernel, float*** input, float* output, int depth_input, int dim_input, int size_output); void make_dense_linearized(Kernel_nn* kernel, float*** input, float* output, int input_depth, int input_width, int size_output);
#endif #endif

View File

@ -33,5 +33,5 @@ Network* read_network(char* filename);
/* /*
* Lit une kernel dans le fichier spécifié par le pointeur ptr * Lit une kernel dans le fichier spécifié par le pointeur ptr
*/ */
Kernel* read_kernel(int type_couche, int output_dim, FILE* ptr); Kernel* read_kernel(int type_couche, int output_width, FILE* ptr);
#endif #endif

View File

@ -6,7 +6,7 @@
/* /*
* Affiche le kernel d'une couche de convolution * Affiche le kernel d'une couche de convolution
*/ */
void print_kernel_cnn(Kernel_cnn* k, int depth_input, int dim_input, int depth_output, int dim_output); void print_kernel_cnn(Kernel_cnn* k, int input_depth, int input_width, int output_depth, int output_width);
/* /*
* Affiche une couche de pooling * Affiche une couche de pooling

View File

@ -12,15 +12,15 @@
typedef struct Kernel_cnn { typedef struct Kernel_cnn {
// Noyau ayant une couche matricielle en sortie // Noyau ayant une couche matricielle en sortie
int k_size; // k_size = dim_input - dim_output + 1 int k_size; // k_size = input_width - output_width + 1
int rows; // Depth de l'input int rows; // Depth de l'input
int columns; // Depth de l'output int columns; // Depth de l'output
float*** bias; // bias[columns][dim_output][dim_output] <=> bias[depth output][dim output][dim output] float*** bias; // bias[columns][output_width][output_width] <=> bias[depth output][dim output][dim output]
float*** d_bias; // d_bias[columns][dim_output][dim_output] float*** d_bias; // d_bias[columns][output_width][output_width]
#ifdef ADAM_CNN_BIAS #ifdef ADAM_CNN_BIAS
float*** s_d_bias; // s_d_bias[columns][dim_output][dim_output] float*** s_d_bias; // s_d_bias[columns][output_width][output_width]
float*** v_d_bias; // v_d_bias[columns][dim_output][dim_output] float*** v_d_bias; // v_d_bias[columns][output_width][output_width]
#endif #endif
float**** weights; // weights[rows][columns][k_size][k_size] <=> weights[depth input][depth output][size kernel][size kernel] float**** weights; // weights[rows][columns][k_size][k_size] <=> weights[depth input][depth output][size kernel][size kernel]

View File

@ -24,7 +24,7 @@ __global__ void make_average_pooling_kernel(float*** input, float*** output, int
int idy = threadIdx.y + blockDim.y*blockIdx.y; // < output_width int idy = threadIdx.y + blockDim.y*blockIdx.y; // < output_width
int idz = threadIdx.z + blockDim.z*blockIdx.z; // < output_width int idz = threadIdx.z + blockDim.z*blockIdx.z; // < output_width
int max_move = size - padding; int max_move = size - padding;
int input_dim = output_width*stride - 2*padding + size - stride; int input_width = output_width*stride - 2*padding + size - stride;
if (idx >= output_depth || idy >= output_width || idz >= output_width) { if (idx >= output_depth || idy >= output_width || idz >= output_width) {
return; return;
@ -37,7 +37,7 @@ __global__ void make_average_pooling_kernel(float*** input, float*** output, int
for (int b=-padding; b < max_move; b++) { for (int b=-padding; b < max_move; b++) {
int idy_2 = stride*idy +a; int idy_2 = stride*idy +a;
int idz_2 = stride*idz +b; int idz_2 = stride*idz +b;
if (pooling_not_outside(idy_2, idz_2, 0, input_dim)) { if (pooling_not_outside(idy_2, idz_2, 0, input_width)) {
sum += input[idx][idy_2][idz_2]; sum += input[idx][idy_2][idz_2];
nb_elements++; nb_elements++;
} }
@ -61,7 +61,7 @@ void make_average_pooling_cpu(float*** input, float*** output, int size, int out
// input[output_depth][output_width+size-1][output_width+size-1] // input[output_depth][output_width+size-1][output_width+size-1]
// output[output_depth][output_width][output_width] // output[output_depth][output_width][output_width]
int max_move = size - padding; int max_move = size - padding;
int input_dim = output_width*stride - 2*padding + size - stride; int input_width = output_width*stride - 2*padding + size - stride;
for (int i=0; i < output_depth; i++) { for (int i=0; i < output_depth; i++) {
for (int j=0; j < output_width; j++) { for (int j=0; j < output_width; j++) {
@ -72,7 +72,7 @@ void make_average_pooling_cpu(float*** input, float*** output, int size, int out
for (int b=-padding; b < max_move; b++) { for (int b=-padding; b < max_move; b++) {
int j_2 = stride*j +a; int j_2 = stride*j +a;
int k_2 = stride*k +b; int k_2 = stride*k +b;
if (pooling_not_outside(j_2, k_2, 0, input_dim)) { if (pooling_not_outside(j_2, k_2, 0, input_width)) {
sum += input[i][j_2][k_2]; sum += input[i][j_2][k_2];
nb_elements++; nb_elements++;
} }
@ -108,7 +108,7 @@ __global__ void make_max_pooling_kernel(float*** input, float*** output, int siz
int idx = threadIdx.x + blockDim.x*blockIdx.x; // < output_depth int idx = threadIdx.x + blockDim.x*blockIdx.x; // < output_depth
int idy = threadIdx.y + blockDim.y*blockIdx.y; // < output_width int idy = threadIdx.y + blockDim.y*blockIdx.y; // < output_width
int idz = threadIdx.z + blockDim.z*blockIdx.z; // < output_width int idz = threadIdx.z + blockDim.z*blockIdx.z; // < output_width
int input_dim = output_width*stride - 2*padding + size - stride; int input_width = output_width*stride - 2*padding + size - stride;
if (idx >= output_depth || idy >= output_width || idz >= output_width) { if (idx >= output_depth || idy >= output_width || idz >= output_width) {
return; return;
@ -122,7 +122,7 @@ __global__ void make_max_pooling_kernel(float*** input, float*** output, int siz
for (int b=-padding; b < max_move; b++) { for (int b=-padding; b < max_move; b++) {
int idy_2 = stride*idy +a; int idy_2 = stride*idy +a;
int idz_2 = stride*idz +b; int idz_2 = stride*idz +b;
if (pooling_not_outside(idy_2, idz_2, 0, input_dim)) { if (pooling_not_outside(idy_2, idz_2, 0, input_width)) {
temp = input[idx][idy_2][idz_2]; temp = input[idx][idy_2][idz_2];
m = m > temp ? m : temp; // max(m, temp) m = m > temp ? m : temp; // max(m, temp)
} }
@ -146,7 +146,7 @@ void make_max_pooling_cpu(float*** input, float*** output, int size, int output_
// input[output_depth][output_width+size-1][output_width+size-1] // input[output_depth][output_width+size-1][output_width+size-1]
// output[output_depth][output_width][output_width] // output[output_depth][output_width][output_width]
int max_move = size - padding; int max_move = size - padding;
int input_dim = output_width*stride - 2*padding + size - stride; int input_width = output_width*stride - 2*padding + size - stride;
float m; float m;
for (int i=0; i < output_depth; i++) { for (int i=0; i < output_depth; i++) {
for (int j=0; j < output_width; j++) { for (int j=0; j < output_width; j++) {
@ -156,7 +156,7 @@ void make_max_pooling_cpu(float*** input, float*** output, int size, int output_
for (int b=-padding; b < max_move; b++) { for (int b=-padding; b < max_move; b++) {
int j_2 = stride*j +a; int j_2 = stride*j +a;
int k_2 = stride*k +b; int k_2 = stride*k +b;
if (pooling_not_outside(j_2, k_2, 0, input_dim)) { if (pooling_not_outside(j_2, k_2, 0, input_width)) {
m = fmaxf(m, input[i][j_2][k_2]); m = fmaxf(m, input[i][j_2][k_2]);
} }
} }
@ -248,7 +248,7 @@ void make_dense(Kernel_nn* kernel, float* input, float* output, int size_input,
* Dense linearized * Dense linearized
*/ */
#ifdef __CUDACC__ #ifdef __CUDACC__
__global__ void make_dense_linearized_kernel(float** weights, float* bias, float*** input, float* output, int depth_input, int dim_input, int size_output) { __global__ void make_dense_linearized_kernel(float** weights, float* bias, float*** input, float* output, int input_depth, int input_width, int size_output) {
// Équivalents respectifs de i, j et k dans la boucle effectuée par le cpu // Équivalents respectifs de i, j et k dans la boucle effectuée par le cpu
int idx = threadIdx.x + blockDim.x*blockIdx.x; // < size_output int idx = threadIdx.x + blockDim.x*blockIdx.x; // < size_output
@ -257,38 +257,38 @@ __global__ void make_dense_linearized_kernel(float** weights, float* bias, float
} }
float f = bias[idx]; float f = bias[idx];
for (int i=0; i < depth_input; i++) { for (int i=0; i < input_depth; i++) {
for (int j=0; j < dim_input; j++) { for (int j=0; j < input_width; j++) {
for (int k=0; k < dim_input; k++) { for (int k=0; k < input_width; k++) {
f += input[i][j][k]*weights[k + j*dim_input + i*depth_input][idx]; f += input[i][j][k]*weights[k + j*input_width + i*input_depth][idx];
} }
} }
} }
output[idx] = f; output[idx] = f;
} }
void make_dense_linearized_device(Kernel_nn* kernel, float*** input, float* output, int depth_input, int dim_input, int size_output) { void make_dense_linearized_device(Kernel_nn* kernel, float*** input, float* output, int input_depth, int input_width, int size_output) {
// Make computation // Make computation
dim3 gridSize(i_div_up(size_output, BLOCKSIZE_x*BLOCKSIZE_y), 1, 1); dim3 gridSize(i_div_up(size_output, BLOCKSIZE_x*BLOCKSIZE_y), 1, 1);
dim3 blockSize(BLOCKSIZE_x*BLOCKSIZE_y, 1, BLOCKSIZE_z); dim3 blockSize(BLOCKSIZE_x*BLOCKSIZE_y, 1, BLOCKSIZE_z);
make_dense_linearized_kernel<<<gridSize, blockSize>>>(kernel->weights, kernel->bias, input, output, depth_input, dim_input, size_output); make_dense_linearized_kernel<<<gridSize, blockSize>>>(kernel->weights, kernel->bias, input, output, input_depth, input_width, size_output);
gpuErrchk( cudaPeekAtLastError() ); gpuErrchk( cudaPeekAtLastError() );
gpuErrchk( cudaDeviceSynchronize() ); gpuErrchk( cudaDeviceSynchronize() );
} }
#endif #endif
void make_dense_linearized_cpu(Kernel_nn* kernel, float*** input, float* output, int depth_input, int dim_input, int size_output) { void make_dense_linearized_cpu(Kernel_nn* kernel, float*** input, float* output, int input_depth, int input_width, int size_output) {
// input[depth_input][dim_input][dim_input] // input[input_depth][input_width][input_width]
// output[size_output] // output[size_output]
float f; float f;
for (int l=0; l < size_output; l++) { for (int l=0; l < size_output; l++) {
f = kernel->bias[l]; f = kernel->bias[l];
for (int i=0; i < depth_input; i++) { for (int i=0; i < input_depth; i++) {
for (int j=0; j < dim_input; j++) { for (int j=0; j < input_width; j++) {
for (int k=0; k < dim_input; k++) { for (int k=0; k < input_width; k++) {
f += input[i][j][k]*kernel->weights[k + j*dim_input + i*depth_input][l]; f += input[i][j][k]*kernel->weights[k + j*input_width + i*input_depth][l];
} }
} }
} }
@ -299,10 +299,10 @@ void make_dense_linearized_cpu(Kernel_nn* kernel, float*** input, float* output,
#ifdef __CUDACC__ #ifdef __CUDACC__
extern "C" extern "C"
#endif #endif
void make_dense_linearized(Kernel_nn* kernel, float*** input, float* output, int depth_input, int dim_input, int size_output) { void make_dense_linearized(Kernel_nn* kernel, float*** input, float* output, int input_depth, int input_width, int size_output) {
#ifndef __CUDACC__ #ifndef __CUDACC__
make_dense_linearized_cpu(kernel, input, output, depth_input, dim_input, size_output); make_dense_linearized_cpu(kernel, input, output, input_depth, input_width, size_output);
#else #else
make_dense_linearized_device(kernel, input, output, depth_input, dim_input, size_output); make_dense_linearized_device(kernel, input, output, input_depth, input_width, size_output);
#endif #endif
} }

View File

@ -24,7 +24,7 @@ __global__ void make_average_pooling_kernel(float*** input, float*** output, int
int idy = threadIdx.y + blockDim.y*blockIdx.y; // < output_width int idy = threadIdx.y + blockDim.y*blockIdx.y; // < output_width
int idz = threadIdx.z + blockDim.z*blockIdx.z; // < output_width int idz = threadIdx.z + blockDim.z*blockIdx.z; // < output_width
int max_move = size - padding; int max_move = size - padding;
int input_dim = output_width*stride - 2*padding + size - stride; int input_width = output_width*stride - 2*padding + size - stride;
if (idx >= output_depth || idy >= output_width || idz >= output_width) { if (idx >= output_depth || idy >= output_width || idz >= output_width) {
return; return;
@ -37,7 +37,7 @@ __global__ void make_average_pooling_kernel(float*** input, float*** output, int
for (int b=-padding; b < max_move; b++) { for (int b=-padding; b < max_move; b++) {
int idy_2 = stride*idy +a; int idy_2 = stride*idy +a;
int idz_2 = stride*idz +b; int idz_2 = stride*idz +b;
if (pooling_not_outside(idy_2, idz_2, 0, input_dim)) { if (pooling_not_outside(idy_2, idz_2, 0, input_width)) {
sum += input[idx][idy_2][idz_2]; sum += input[idx][idy_2][idz_2];
nb_elements++; nb_elements++;
} }
@ -61,7 +61,7 @@ void make_average_pooling_cpu(float*** input, float*** output, int size, int out
// input[output_depth][output_width+size-1][output_width+size-1] // input[output_depth][output_width+size-1][output_width+size-1]
// output[output_depth][output_width][output_width] // output[output_depth][output_width][output_width]
int max_move = size - padding; int max_move = size - padding;
int input_dim = output_width*stride - 2*padding + size - stride; int input_width = output_width*stride - 2*padding + size - stride;
for (int i=0; i < output_depth; i++) { for (int i=0; i < output_depth; i++) {
for (int j=0; j < output_width; j++) { for (int j=0; j < output_width; j++) {
@ -72,7 +72,7 @@ void make_average_pooling_cpu(float*** input, float*** output, int size, int out
for (int b=-padding; b < max_move; b++) { for (int b=-padding; b < max_move; b++) {
int j_2 = stride*j +a; int j_2 = stride*j +a;
int k_2 = stride*k +b; int k_2 = stride*k +b;
if (pooling_not_outside(j_2, k_2, 0, input_dim)) { if (pooling_not_outside(j_2, k_2, 0, input_width)) {
sum += input[i][j_2][k_2]; sum += input[i][j_2][k_2];
nb_elements++; nb_elements++;
} }
@ -108,7 +108,7 @@ __global__ void make_max_pooling_kernel(float*** input, float*** output, int siz
int idx = threadIdx.x + blockDim.x*blockIdx.x; // < output_depth int idx = threadIdx.x + blockDim.x*blockIdx.x; // < output_depth
int idy = threadIdx.y + blockDim.y*blockIdx.y; // < output_width int idy = threadIdx.y + blockDim.y*blockIdx.y; // < output_width
int idz = threadIdx.z + blockDim.z*blockIdx.z; // < output_width int idz = threadIdx.z + blockDim.z*blockIdx.z; // < output_width
int input_dim = output_width*stride - 2*padding + size - stride; int input_width = output_width*stride - 2*padding + size - stride;
if (idx >= output_depth || idy >= output_width || idz >= output_width) { if (idx >= output_depth || idy >= output_width || idz >= output_width) {
return; return;
@ -122,7 +122,7 @@ __global__ void make_max_pooling_kernel(float*** input, float*** output, int siz
for (int b=-padding; b < max_move; b++) { for (int b=-padding; b < max_move; b++) {
int idy_2 = stride*idy +a; int idy_2 = stride*idy +a;
int idz_2 = stride*idz +b; int idz_2 = stride*idz +b;
if (pooling_not_outside(idy_2, idz_2, 0, input_dim)) { if (pooling_not_outside(idy_2, idz_2, 0, input_width)) {
temp = input[idx][idy_2][idz_2]; temp = input[idx][idy_2][idz_2];
m = m > temp ? m : temp; // max(m, temp) m = m > temp ? m : temp; // max(m, temp)
} }
@ -146,7 +146,7 @@ void make_max_pooling_cpu(float*** input, float*** output, int size, int output_
// input[output_depth][output_width+size-1][output_width+size-1] // input[output_depth][output_width+size-1][output_width+size-1]
// output[output_depth][output_width][output_width] // output[output_depth][output_width][output_width]
int max_move = size - padding; int max_move = size - padding;
int input_dim = output_width*stride - 2*padding + size - stride; int input_width = output_width*stride - 2*padding + size - stride;
float m; float m;
for (int i=0; i < output_depth; i++) { for (int i=0; i < output_depth; i++) {
for (int j=0; j < output_width; j++) { for (int j=0; j < output_width; j++) {
@ -156,7 +156,7 @@ void make_max_pooling_cpu(float*** input, float*** output, int size, int output_
for (int b=-padding; b < max_move; b++) { for (int b=-padding; b < max_move; b++) {
int j_2 = stride*j +a; int j_2 = stride*j +a;
int k_2 = stride*k +b; int k_2 = stride*k +b;
if (pooling_not_outside(j_2, k_2, 0, input_dim)) { if (pooling_not_outside(j_2, k_2, 0, input_width)) {
m = fmaxf(m, input[i][j_2][k_2]); m = fmaxf(m, input[i][j_2][k_2]);
} }
} }
@ -248,7 +248,7 @@ void make_dense(Kernel_nn* kernel, float* input, float* output, int size_input,
* Dense linearized * Dense linearized
*/ */
#ifdef __CUDACC__ #ifdef __CUDACC__
__global__ void make_dense_linearized_kernel(float** weights, float* bias, float*** input, float* output, int depth_input, int dim_input, int size_output) { __global__ void make_dense_linearized_kernel(float** weights, float* bias, float*** input, float* output, int input_depth, int input_width, int size_output) {
// Équivalents respectifs de i, j et k dans la boucle effectuée par le cpu // Équivalents respectifs de i, j et k dans la boucle effectuée par le cpu
int idx = threadIdx.x + blockDim.x*blockIdx.x; // < size_output int idx = threadIdx.x + blockDim.x*blockIdx.x; // < size_output
@ -257,38 +257,38 @@ __global__ void make_dense_linearized_kernel(float** weights, float* bias, float
} }
float f = bias[idx]; float f = bias[idx];
for (int i=0; i < depth_input; i++) { for (int i=0; i < input_depth; i++) {
for (int j=0; j < dim_input; j++) { for (int j=0; j < input_width; j++) {
for (int k=0; k < dim_input; k++) { for (int k=0; k < input_width; k++) {
f += input[i][j][k]*weights[k + j*dim_input + i*depth_input][idx]; f += input[i][j][k]*weights[k + j*input_width + i*input_depth][idx];
} }
} }
} }
output[idx] = f; output[idx] = f;
} }
void make_dense_linearized_device(Kernel_nn* kernel, float*** input, float* output, int depth_input, int dim_input, int size_output) { void make_dense_linearized_device(Kernel_nn* kernel, float*** input, float* output, int input_depth, int input_width, int size_output) {
// Make computation // Make computation
dim3 gridSize(i_div_up(size_output, BLOCKSIZE_x*BLOCKSIZE_y), 1, 1); dim3 gridSize(i_div_up(size_output, BLOCKSIZE_x*BLOCKSIZE_y), 1, 1);
dim3 blockSize(BLOCKSIZE_x*BLOCKSIZE_y, 1, BLOCKSIZE_z); dim3 blockSize(BLOCKSIZE_x*BLOCKSIZE_y, 1, BLOCKSIZE_z);
make_dense_linearized_kernel<<<gridSize, blockSize>>>(kernel->weights, kernel->bias, input, output, depth_input, dim_input, size_output); make_dense_linearized_kernel<<<gridSize, blockSize>>>(kernel->weights, kernel->bias, input, output, input_depth, input_width, size_output);
gpuErrchk( cudaPeekAtLastError() ); gpuErrchk( cudaPeekAtLastError() );
gpuErrchk( cudaDeviceSynchronize() ); gpuErrchk( cudaDeviceSynchronize() );
} }
#endif #endif
void make_dense_linearized_cpu(Kernel_nn* kernel, float*** input, float* output, int depth_input, int dim_input, int size_output) { void make_dense_linearized_cpu(Kernel_nn* kernel, float*** input, float* output, int input_depth, int input_width, int size_output) {
// input[depth_input][dim_input][dim_input] // input[input_depth][input_width][input_width]
// output[size_output] // output[size_output]
float f; float f;
for (int l=0; l < size_output; l++) { for (int l=0; l < size_output; l++) {
f = kernel->bias[l]; f = kernel->bias[l];
for (int i=0; i < depth_input; i++) { for (int i=0; i < input_depth; i++) {
for (int j=0; j < dim_input; j++) { for (int j=0; j < input_width; j++) {
for (int k=0; k < dim_input; k++) { for (int k=0; k < input_width; k++) {
f += input[i][j][k]*kernel->weights[k + j*dim_input + i*depth_input][l]; f += input[i][j][k]*kernel->weights[k + j*input_width + i*input_depth][l];
} }
} }
} }
@ -299,10 +299,10 @@ void make_dense_linearized_cpu(Kernel_nn* kernel, float*** input, float* output,
#ifdef __CUDACC__ #ifdef __CUDACC__
extern "C" extern "C"
#endif #endif
void make_dense_linearized(Kernel_nn* kernel, float*** input, float* output, int depth_input, int dim_input, int size_output) { void make_dense_linearized(Kernel_nn* kernel, float*** input, float* output, int input_depth, int input_width, int size_output) {
#ifndef __CUDACC__ #ifndef __CUDACC__
make_dense_linearized_cpu(kernel, input, output, depth_input, dim_input, size_output); make_dense_linearized_cpu(kernel, input, output, input_depth, input_width, size_output);
#else #else
make_dense_linearized_device(kernel, input, output, depth_input, dim_input, size_output); make_dense_linearized_device(kernel, input, output, input_depth, input_width, size_output);
#endif #endif
} }

View File

@ -73,7 +73,7 @@ void write_couche(Network* network, int indice_couche, int type_couche, FILE* pt
int indice_buffer = 0; int indice_buffer = 0;
if (type_couche == 0) { // Cas du CNN if (type_couche == 0) { // Cas du CNN
Kernel_cnn* cnn = kernel->cnn; Kernel_cnn* cnn = kernel->cnn;
int output_dim = network->width[indice_couche+1]; int output_width = network->width[indice_couche+1];
// Écriture du pré-corps // Écriture du pré-corps
uint32_t pre_buffer[7]; uint32_t pre_buffer[7];
@ -90,9 +90,9 @@ void write_couche(Network* network, int indice_couche, int type_couche, FILE* pt
// We need to split in small buffers to keep some free memory in the computer // We need to split in small buffers to keep some free memory in the computer
for (int i=0; i < cnn->columns; i++) { for (int i=0; i < cnn->columns; i++) {
indice_buffer = 0; indice_buffer = 0;
float buffer[output_dim*output_dim]; float buffer[output_width*output_width];
for (int j=0; j < output_dim; j++) { for (int j=0; j < output_width; j++) {
for (int k=0; k < output_dim; k++) { for (int k=0; k < output_width; k++) {
bufferAdd(cnn->bias[i][j][k]); bufferAdd(cnn->bias[i][j][k]);
} }
} }
@ -234,7 +234,7 @@ Network* read_network(char* filename) {
return network; return network;
} }
Kernel* read_kernel(int type_couche, int output_dim, FILE* ptr) { Kernel* read_kernel(int type_couche, int output_width, FILE* ptr) {
Kernel* kernel = (Kernel*)nalloc(1, sizeof(Kernel)); Kernel* kernel = (Kernel*)nalloc(1, sizeof(Kernel));
if (type_couche == CNN) { // Cas du CNN if (type_couche == CNN) { // Cas du CNN
// Lecture du "Pré-corps" // Lecture du "Pré-corps"
@ -262,20 +262,20 @@ Kernel* read_kernel(int type_couche, int output_dim, FILE* ptr) {
cnn->v_d_bias = (float***)nalloc(cnn->columns, sizeof(float**)); cnn->v_d_bias = (float***)nalloc(cnn->columns, sizeof(float**));
#endif #endif
for (int i=0; i < cnn->columns; i++) { for (int i=0; i < cnn->columns; i++) {
cnn->bias[i] = (float**)nalloc(output_dim, sizeof(float*)); cnn->bias[i] = (float**)nalloc(output_width, sizeof(float*));
cnn->d_bias[i] = (float**)nalloc(output_dim, sizeof(float*)); cnn->d_bias[i] = (float**)nalloc(output_width, sizeof(float*));
#ifdef ADAM_CNN_BIAS #ifdef ADAM_CNN_BIAS
cnn->s_d_bias[i] = (float**)nalloc(output_dim, sizeof(float*)); cnn->s_d_bias[i] = (float**)nalloc(output_width, sizeof(float*));
cnn->v_d_bias[i] = (float**)nalloc(output_dim, sizeof(float*)); cnn->v_d_bias[i] = (float**)nalloc(output_width, sizeof(float*));
#endif #endif
for (int j=0; j < output_dim; j++) { for (int j=0; j < output_width; j++) {
cnn->bias[i][j] = (float*)nalloc(output_dim, sizeof(float)); cnn->bias[i][j] = (float*)nalloc(output_width, sizeof(float));
cnn->d_bias[i][j] = (float*)nalloc(output_dim, sizeof(float)); cnn->d_bias[i][j] = (float*)nalloc(output_width, sizeof(float));
#ifdef ADAM_CNN_BIAS #ifdef ADAM_CNN_BIAS
cnn->s_d_bias[i][j] = (float*)nalloc(output_dim, sizeof(float)); cnn->s_d_bias[i][j] = (float*)nalloc(output_width, sizeof(float));
cnn->v_d_bias[i][j] = (float*)nalloc(output_dim, sizeof(float)); cnn->v_d_bias[i][j] = (float*)nalloc(output_width, sizeof(float));
#endif #endif
for (int k=0; k < output_dim; k++) { for (int k=0; k < output_width; k++) {
(void) !fread(&tmp, sizeof(tmp), 1, ptr); (void) !fread(&tmp, sizeof(tmp), 1, ptr);
cnn->bias[i][j][k] = tmp; cnn->bias[i][j][k] = tmp;
cnn->d_bias[i][j][k] = 0.; cnn->d_bias[i][j][k] = 0.;

View File

@ -11,13 +11,13 @@
#define purple printf("\033[0;35m") #define purple printf("\033[0;35m")
#define reset_color printf("\033[0m") #define reset_color printf("\033[0m")
void print_kernel_cnn(Kernel_cnn* ker, int depth_input, int dim_input, int depth_output, int dim_output) { void print_kernel_cnn(Kernel_cnn* ker, int input_depth, int input_width, int output_depth, int output_width) {
int k_size = dim_input - dim_output + 1; int k_size = input_width - output_width + 1;
// print bias // print bias
green; green;
for (int i=0; i<depth_output; i++) { for (int i=0; i<output_depth; i++) {
for (int j=0; j<dim_output; j++) { for (int j=0; j<output_width; j++) {
for (int k=0; k<dim_output; k++) { for (int k=0; k<output_width; k++) {
printf("%.2f", ker->bias[i][j][k]); printf("%.2f", ker->bias[i][j][k]);
} }
print_space; print_space;
@ -29,9 +29,9 @@ void print_kernel_cnn(Kernel_cnn* ker, int depth_input, int dim_input, int depth
//print weights //print weights
red; red;
for (int i=0; i<depth_input; i++) { for (int i=0; i<input_depth; i++) {
printf("------Line %d-----\n", i); printf("------Line %d-----\n", i);
for (int j=0; j<depth_output; j++) { for (int j=0; j<output_depth; j++) {
for (int k=0; k<k_size; k++) { for (int k=0; k<k_size; k++) {
for (int l=0; l<k_size; l++) { for (int l=0; l<k_size; l++) {
printf("%.2f", ker->weights[i][j][k][l]); printf("%.2f", ker->weights[i][j][k][l]);

View File

@ -153,7 +153,7 @@ void train(int dataset_type, char* images_file, char* labels_file, char* data_di
//* Chargement du dataset //* Chargement du dataset
int input_dim = -1; int input_width = -1;
int input_depth = -1; int input_depth = -1;
int nb_images_total; // Images au total int nb_images_total; // Images au total
@ -172,11 +172,11 @@ void train(int dataset_type, char* images_file, char* labels_file, char* data_di
images = read_mnist_images(images_file); images = read_mnist_images(images_file);
labels = read_mnist_labels(labels_file); labels = read_mnist_labels(labels_file);
input_dim = 32; input_width = 32;
input_depth = 1; input_depth = 1;
} else { // Type JPG } else { // Type JPG
dataset = loadJpegDataset(data_dir); dataset = loadJpegDataset(data_dir);
input_dim = dataset->height + 4; // image_size + padding input_width = dataset->height + 4; // image_size + padding
input_depth = dataset->numComponents; input_depth = dataset->numComponents;
nb_images_total = dataset->numImages; nb_images_total = dataset->numImages;
@ -185,8 +185,8 @@ void train(int dataset_type, char* images_file, char* labels_file, char* data_di
//* Création du réseau //* Création du réseau
Network* network; Network* network;
if (!recover) { if (!recover) {
network = create_network_lenet5(LEARNING_RATE, 0, RELU, NORMALIZED_XAVIER, input_dim, input_depth); network = create_network_lenet5(LEARNING_RATE, 0, RELU, NORMALIZED_XAVIER, input_width, input_depth);
//network = create_simple_one(LEARNING_RATE, 0, RELU, GLOROT, input_dim, input_depth); //network = create_simple_one(LEARNING_RATE, 0, RELU, GLOROT, input_width, input_depth);
} else { } else {
network = read_network(recover); network = read_network(recover);
network->learning_rate = LEARNING_RATE; network->learning_rate = LEARNING_RATE;

View File

@ -33,7 +33,7 @@ void knuth_shuffle(int* tab, int n) {
} }
bool equals_networks(Network* network1, Network* network2) { bool equals_networks(Network* network1, Network* network2) {
int output_dim; int output_width;
checkEquals(size, "size", -1); checkEquals(size, "size", -1);
checkEquals(initialisation, "initialisation", -1); checkEquals(initialisation, "initialisation", -1);
checkEquals(dropout, "dropout", -1); checkEquals(dropout, "dropout", -1);
@ -70,13 +70,13 @@ bool equals_networks(Network* network1, Network* network2) {
} }
} else { } else {
// Type CNN // Type CNN
output_dim = network1->width[i+1]; output_width = network1->width[i+1];
checkEquals(kernel[i]->cnn->k_size, "kernel[i]->k_size", i); checkEquals(kernel[i]->cnn->k_size, "kernel[i]->k_size", i);
checkEquals(kernel[i]->cnn->rows, "kernel[i]->rows", i); checkEquals(kernel[i]->cnn->rows, "kernel[i]->rows", i);
checkEquals(kernel[i]->cnn->columns, "kernel[i]->columns", i); checkEquals(kernel[i]->cnn->columns, "kernel[i]->columns", i);
for (int j=0; j < network1->kernel[i]->cnn->columns; j++) { for (int j=0; j < network1->kernel[i]->cnn->columns; j++) {
for (int k=0; k < output_dim; k++) { for (int k=0; k < output_width; k++) {
for (int l=0; l < output_dim; l++) { for (int l=0; l < output_width; l++) {
checkEquals(kernel[i]->cnn->bias[j][k][l], "kernel[i]->cnn->bias[j][k][l]", l); checkEquals(kernel[i]->cnn->bias[j][k][l], "kernel[i]->cnn->bias[j][k][l]", l);
} }
} }
@ -108,7 +108,7 @@ Network* copy_network(Network* network) {
int rows; int rows;
int k_size; int k_size;
int columns; int columns;
int output_dim; int output_width;
copyVar(dropout); copyVar(dropout);
copyVar(learning_rate); copyVar(learning_rate);
@ -200,7 +200,7 @@ Network* copy_network(Network* network) {
rows = network->kernel[i]->cnn->rows; rows = network->kernel[i]->cnn->rows;
k_size = network->kernel[i]->cnn->k_size; k_size = network->kernel[i]->cnn->k_size;
columns = network->kernel[i]->cnn->columns; columns = network->kernel[i]->cnn->columns;
output_dim = network->width[i+1]; output_width = network->width[i+1];
network_cp->kernel[i]->nn = NULL; network_cp->kernel[i]->nn = NULL;
@ -217,20 +217,20 @@ Network* copy_network(Network* network) {
network_cp->kernel[i]->cnn->v_d_bias = (float***)nalloc(columns, sizeof(float**)); network_cp->kernel[i]->cnn->v_d_bias = (float***)nalloc(columns, sizeof(float**));
#endif #endif
for (int j=0; j < columns; j++) { for (int j=0; j < columns; j++) {
network_cp->kernel[i]->cnn->bias[j] = (float**)nalloc(output_dim, sizeof(float*)); network_cp->kernel[i]->cnn->bias[j] = (float**)nalloc(output_width, sizeof(float*));
network_cp->kernel[i]->cnn->d_bias[j] = (float**)nalloc(output_dim, sizeof(float*)); network_cp->kernel[i]->cnn->d_bias[j] = (float**)nalloc(output_width, sizeof(float*));
#ifdef ADAM_CNN_BIAS #ifdef ADAM_CNN_BIAS
network_cp->kernel[i]->cnn->s_d_bias[j] = (float**)nalloc(output_dim, sizeof(float*)); network_cp->kernel[i]->cnn->s_d_bias[j] = (float**)nalloc(output_width, sizeof(float*));
network_cp->kernel[i]->cnn->v_d_bias[j] = (float**)nalloc(output_dim, sizeof(float*)); network_cp->kernel[i]->cnn->v_d_bias[j] = (float**)nalloc(output_width, sizeof(float*));
#endif #endif
for (int k=0; k < output_dim; k++) { for (int k=0; k < output_width; k++) {
network_cp->kernel[i]->cnn->bias[j][k] = (float*)nalloc(output_dim, sizeof(float)); network_cp->kernel[i]->cnn->bias[j][k] = (float*)nalloc(output_width, sizeof(float));
network_cp->kernel[i]->cnn->d_bias[j][k] = (float*)nalloc(output_dim, sizeof(float)); network_cp->kernel[i]->cnn->d_bias[j][k] = (float*)nalloc(output_width, sizeof(float));
#ifdef ADAM_CNN_BIAS #ifdef ADAM_CNN_BIAS
network_cp->kernel[i]->cnn->s_d_bias[j][k] = (float*)nalloc(output_dim, sizeof(float)); network_cp->kernel[i]->cnn->s_d_bias[j][k] = (float*)nalloc(output_width, sizeof(float));
network_cp->kernel[i]->cnn->v_d_bias[j][k] = (float*)nalloc(output_dim, sizeof(float)); network_cp->kernel[i]->cnn->v_d_bias[j][k] = (float*)nalloc(output_width, sizeof(float));
#endif #endif
for (int l=0; l < output_dim; l++) { for (int l=0; l < output_width; l++) {
copyVar(kernel[i]->cnn->bias[j][k][l]); copyVar(kernel[i]->cnn->bias[j][k][l]);
network_cp->kernel[i]->cnn->d_bias[j][k][l] = 0.; network_cp->kernel[i]->cnn->d_bias[j][k][l] = 0.;
#ifdef ADAM_CNN_BIAS #ifdef ADAM_CNN_BIAS
@ -324,7 +324,7 @@ void copy_network_parameters(Network* network_src, Network* network_dest) {
int rows; int rows;
int k_size; int k_size;
int columns; int columns;
int output_dim; int output_width;
copyVarParams(learning_rate); copyVarParams(learning_rate);
@ -348,11 +348,11 @@ void copy_network_parameters(Network* network_src, Network* network_dest) {
rows = network_src->kernel[i]->cnn->rows; rows = network_src->kernel[i]->cnn->rows;
k_size = network_src->kernel[i]->cnn->k_size; k_size = network_src->kernel[i]->cnn->k_size;
columns = network_src->kernel[i]->cnn->columns; columns = network_src->kernel[i]->cnn->columns;
output_dim = network_src->width[i+1]; output_width = network_src->width[i+1];
for (int j=0; j < columns; j++) { for (int j=0; j < columns; j++) {
for (int k=0; k < output_dim; k++) { for (int k=0; k < output_width; k++) {
for (int l=0; l < output_dim; l++) { for (int l=0; l < output_width; l++) {
copyVarParams(kernel[i]->cnn->bias[j][k][l]); copyVarParams(kernel[i]->cnn->bias[j][k][l]);
} }
} }
@ -385,7 +385,7 @@ int count_null_weights(Network* network) {
int rows; int rows;
int k_size; int k_size;
int columns; int columns;
int output_dim; int output_width;
for (int i=0; i < size-1; i++) { for (int i=0; i < size-1; i++) {
if (!network->kernel[i]->cnn && network->kernel[i]->nn) { // Cas du NN if (!network->kernel[i]->cnn && network->kernel[i]->nn) { // Cas du NN
@ -407,11 +407,11 @@ int count_null_weights(Network* network) {
rows = network->kernel[i]->cnn->rows; rows = network->kernel[i]->cnn->rows;
k_size = network->kernel[i]->cnn->k_size; k_size = network->kernel[i]->cnn->k_size;
columns = network->kernel[i]->cnn->columns; columns = network->kernel[i]->cnn->columns;
output_dim = network->width[i+1]; output_width = network->width[i+1];
for (int j=0; j < columns; j++) { for (int j=0; j < columns; j++) {
for (int k=0; k < output_dim; k++) { for (int k=0; k < output_width; k++) {
for (int l=0; l < output_dim; l++) { for (int l=0; l < output_width; l++) {
null_bias += fabs(network->kernel[i]->cnn->bias[j][k][l]) <= epsilon; null_bias += fabs(network->kernel[i]->cnn->bias[j][k][l]) <= epsilon;
} }
} }

View File

@ -54,12 +54,12 @@ def generate_data_mul():
def generate_data_conv(): def generate_data_conv():
values = [] values = []
output_dim = 40 output_width = 40
rows = 40 rows = 40
columns = 40 columns = 40
for i in range(10): for i in range(10):
values.append(avg([conv_matrix((i+1)*100, output_dim, rows, columns) for j in range(10)])) values.append(avg([conv_matrix((i+1)*100, output_width, rows, columns) for j in range(10)]))
print(f"Added ({(i+1)*100}, output_dim, rows, columns)") print(f"Added ({(i+1)*100}, output_width, rows, columns)")
with open("result_conv.json", "weights") as file: with open("result_conv.json", "weights") as file:
json.dump(values, file, indent=4) json.dump(values, file, indent=4)

View File

@ -102,9 +102,9 @@ bool check_matrices_equality(float*** m1, float*** m2, int n, int p, int q, int
return true; return true;
} }
void run_convolution_test(int input_dim, int output_dim, int rows, int columns) { void run_convolution_test(int input_width, int output_width, int rows, int columns) {
assert(input_dim >= output_dim); assert(input_width >= output_width);
int k_size = input_dim - output_dim +1; int k_size = input_width - output_width +1;
// Génération des données aléatoires // Génération des données aléatoires
Kernel_cnn* kernel = (Kernel_cnn*)malloc(sizeof(Kernel_cnn)); Kernel_cnn* kernel = (Kernel_cnn*)malloc(sizeof(Kernel_cnn));
@ -145,11 +145,11 @@ void run_convolution_test(int input_dim, int output_dim, int rows, int columns)
#endif #endif
} }
float*** input = create_matrix(kernel->rows, input_dim, input_dim, 5.0f); float*** input = create_matrix(kernel->rows, input_width, input_width, 5.0f);
float*** output_cpu = create_empty_matrix(kernel->columns, output_dim, output_dim); float*** output_cpu = create_empty_matrix(kernel->columns, output_width, output_width);
float*** output_gpu = create_empty_matrix(kernel->columns, output_dim, output_dim); float*** output_gpu = create_empty_matrix(kernel->columns, output_width, output_width);
//printf("(%d, %d, %d, %d) Data generation complete\n", rows, columns, input_dim, output_dim); //printf("(%d, %d, %d, %d) Data generation complete\n", rows, columns, input_width, output_width);
// Lancement des calculs // Lancement des calculs
@ -157,7 +157,7 @@ void run_convolution_test(int input_dim, int output_dim, int rows, int columns)
double cpu_time_used, gpu_time_used; double cpu_time_used, gpu_time_used;
start = clock(); start = clock();
make_convolution_device(kernel, input, output_gpu, output_dim, 1); make_convolution_device(kernel, input, output_gpu, output_width, 1);
end = clock(); end = clock();
gpu_time_used = ((double) (end - start)) / CLOCKS_PER_SEC; gpu_time_used = ((double) (end - start)) / CLOCKS_PER_SEC;
@ -165,15 +165,15 @@ void run_convolution_test(int input_dim, int output_dim, int rows, int columns)
start = clock(); start = clock();
make_convolution_cpu(kernel, input, output_cpu, output_dim, 1); make_convolution_cpu(kernel, input, output_cpu, output_width, 1);
end = clock(); end = clock();
cpu_time_used = ((double) (end - start)) / CLOCKS_PER_SEC; cpu_time_used = ((double) (end - start)) / CLOCKS_PER_SEC;
printf("CPU: %lf\n", cpu_time_used); printf("CPU: %lf\n", cpu_time_used);
// Vérification de l'égalité des matrices // Vérification de l'égalité des matrices
//printf("(%d, %d, %d, %d) Checking equality.\n", rows, columns, input_dim, output_dim); //printf("(%d, %d, %d, %d) Checking equality.\n", rows, columns, input_width, output_width);
if (!check_matrices_equality(output_gpu, output_cpu, kernel->columns, output_dim, output_dim, kernel->k_size)) {// TODO: change acceptation if (!check_matrices_equality(output_gpu, output_cpu, kernel->columns, output_width, output_width, kernel->k_size)) {// TODO: change acceptation
//exit(1); //exit(1);
} }
//printf(GREEN "OK\n" RESET); //printf(GREEN "OK\n" RESET);
@ -200,9 +200,9 @@ void run_convolution_test(int input_dim, int output_dim, int rows, int columns)
free(kernel->v_d_weights); free(kernel->v_d_weights);
#endif #endif
free_matrix(input, kernel->rows, input_dim); free_matrix(input, kernel->rows, input_width);
free_matrix(output_cpu, kernel->columns, output_dim); free_matrix(output_cpu, kernel->columns, output_width);
free_matrix(output_gpu, kernel->columns, output_dim); free_matrix(output_gpu, kernel->columns, output_width);
} }

View File

@ -93,9 +93,9 @@ bool check_matrices_equality(float*** m1, float*** m2, int n, int p, int q, int
return true; return true;
} }
void run_convolution_test(int input_dim, int output_dim, int rows, int columns) { void run_convolution_test(int input_width, int output_width, int rows, int columns) {
assert(input_dim >= output_dim); assert(input_width >= output_width);
int k_size = input_dim - output_dim +1; int k_size = input_width - output_width +1;
// Génération des données aléatoires // Génération des données aléatoires
Kernel_cnn* kernel = (Kernel_cnn*)nalloc(1, sizeof(Kernel_cnn)); Kernel_cnn* kernel = (Kernel_cnn*)nalloc(1, sizeof(Kernel_cnn));
@ -104,12 +104,12 @@ void run_convolution_test(int input_dim, int output_dim, int rows, int columns)
kernel->rows = rows; kernel->rows = rows;
kernel->columns = columns; kernel->columns = columns;
// bias[kernel->columns][dim_output][dim_output] // bias[kernel->columns][output_width][output_width]
kernel->bias = create_matrix(kernel->columns, output_dim, output_dim, 15.0f); kernel->bias = create_matrix(kernel->columns, output_width, output_width, 15.0f);
kernel->d_bias = create_matrix(kernel->columns, output_dim, output_dim, 1.5f); kernel->d_bias = create_matrix(kernel->columns, output_width, output_width, 1.5f);
#ifdef ADAM_CNN_BIAS #ifdef ADAM_CNN_BIAS
kernel->s_d_bias = create_matrix(kernel->columns, output_dim, output_dim, 1.5f); kernel->s_d_bias = create_matrix(kernel->columns, output_width, output_width, 1.5f);
kernel->v_d_bias = create_matrix(kernel->columns, output_dim, output_dim, 1.5f); kernel->v_d_bias = create_matrix(kernel->columns, output_width, output_width, 1.5f);
#endif #endif
// weights[rows][columns][k_size][k_size] // weights[rows][columns][k_size][k_size]
@ -128,11 +128,11 @@ void run_convolution_test(int input_dim, int output_dim, int rows, int columns)
#endif #endif
} }
float*** input = create_matrix(kernel->rows, input_dim, input_dim, 5.0f); float*** input = create_matrix(kernel->rows, input_width, input_width, 5.0f);
float*** output_cpu = create_empty_matrix(kernel->columns, output_dim, output_dim); float*** output_cpu = create_empty_matrix(kernel->columns, output_width, output_width);
float*** output_gpu = create_empty_matrix(kernel->columns, output_dim, output_dim); float*** output_gpu = create_empty_matrix(kernel->columns, output_width, output_width);
printf("(%d, %d, %d, %d) Data generation complete\n", rows, columns, input_dim, output_dim); printf("(%d, %d, %d, %d) Data generation complete\n", rows, columns, input_width, output_width);
// Lancement des calculs // Lancement des calculs
@ -140,33 +140,33 @@ void run_convolution_test(int input_dim, int output_dim, int rows, int columns)
double cpu_time_used, gpu_time_used; double cpu_time_used, gpu_time_used;
start_time = omp_get_wtime(); start_time = omp_get_wtime();
make_convolution_device(kernel, input, output_gpu, output_dim, 1); make_convolution_device(kernel, input, output_gpu, output_width, 1);
end_time = omp_get_wtime(); end_time = omp_get_wtime();
gpu_time_used = end_time - start_time; gpu_time_used = end_time - start_time;
printf("(%d, %d, %d, %d) Time used for GPU: %lf seconds\n", rows, columns, input_dim, output_dim, gpu_time_used); printf("(%d, %d, %d, %d) Time used for GPU: %lf seconds\n", rows, columns, input_width, output_width, gpu_time_used);
start_time = omp_get_wtime(); start_time = omp_get_wtime();
make_convolution_cpu(kernel, input, output_cpu, output_dim, 1); make_convolution_cpu(kernel, input, output_cpu, output_width, 1);
end_time = omp_get_wtime(); end_time = omp_get_wtime();
cpu_time_used = end_time - start_time; cpu_time_used = end_time - start_time;
printf("(%d, %d, %d, %d) Time used for CPU: %lf seconds\n", rows, columns, input_dim, output_dim, cpu_time_used); printf("(%d, %d, %d, %d) Time used for CPU: %lf seconds\n", rows, columns, input_width, output_width, cpu_time_used);
// Vérification de l'égalité des matrices // Vérification de l'égalité des matrices
printf("(%d, %d, %d, %d) Checking equality.\n", rows, columns, input_dim, output_dim); printf("(%d, %d, %d, %d) Checking equality.\n", rows, columns, input_width, output_width);
if (!check_matrices_equality(output_gpu, output_cpu, kernel->columns, output_dim, output_dim, kernel->k_size)) {// TODO: change acceptation if (!check_matrices_equality(output_gpu, output_cpu, kernel->columns, output_width, output_width, kernel->k_size)) {// TODO: change acceptation
exit(1); exit(1);
} }
printf(GREEN "OK\n" RESET); printf(GREEN "OK\n" RESET);
free_matrix(kernel->bias, kernel->columns, output_dim); free_matrix(kernel->bias, kernel->columns, output_width);
free_matrix(kernel->d_bias, kernel->columns, output_dim); free_matrix(kernel->d_bias, kernel->columns, output_width);
#ifdef ADAM_CNN_BIAS #ifdef ADAM_CNN_BIAS
free_matrix(kernel->s_d_bias, kernel->columns, output_dim); free_matrix(kernel->s_d_bias, kernel->columns, output_width);
free_matrix(kernel->v_d_bias, kernel->columns, output_dim); free_matrix(kernel->v_d_bias, kernel->columns, output_width);
#endif #endif
for (int i=0; i < kernel->rows; i++) { for (int i=0; i < kernel->rows; i++) {
@ -184,9 +184,9 @@ void run_convolution_test(int input_dim, int output_dim, int rows, int columns)
gree(kernel->v_d_weights); gree(kernel->v_d_weights);
#endif #endif
free_matrix(input, kernel->rows, input_dim); free_matrix(input, kernel->rows, input_width);
free_matrix(output_cpu, kernel->columns, output_dim); free_matrix(output_cpu, kernel->columns, output_width);
free_matrix(output_gpu, kernel->columns, output_dim); free_matrix(output_gpu, kernel->columns, output_width);
} }