diff --git a/src/cnn/cnn.c b/src/cnn/cnn.c index 47cd002..ceecbc5 100644 --- a/src/cnn/cnn.c +++ b/src/cnn/cnn.c @@ -110,7 +110,7 @@ void forward_propagation(Network* network) { if (k_i->linearisation == 0) { // Vecteur -> Vecteur make_dense(k_i->nn, input[0][0], output[0][0], input_width, output_width); } else { // Matrice -> Vecteur - make_dense_linearised(k_i->nn, input, output[0][0], input_depth, input_width, output_width); + make_dense_linearized(k_i->nn, input, output[0][0], input_depth, input_width, output_width); } copy_input_to_input_z(output, output_a, 1, 1, output_width); choose_apply_function_vector(activation, output, output_width); diff --git a/src/cnn/include/make.h b/src/cnn/include/make.h index ea92d0d..a1fe694 100644 --- a/src/cnn/include/make.h +++ b/src/cnn/include/make.h @@ -43,6 +43,6 @@ extern "C" /* * Effectue une full connection qui passe d'une matrice à un vecteur */ -void make_dense_linearised(Kernel_nn* kernel, float*** input, float* output, int depth_input, int dim_input, int size_output); +void make_dense_linearized(Kernel_nn* kernel, float*** input, float* output, int depth_input, int dim_input, int size_output); #endif \ No newline at end of file diff --git a/src/cnn/make.c b/src/cnn/make.c index 46d8732..b29e458 100644 --- a/src/cnn/make.c +++ b/src/cnn/make.c @@ -229,17 +229,17 @@ void make_dense(Kernel_nn* kernel, float* input, float* output, int size_input, /* -* Dense linearised +* Dense linearized */ #ifdef __CUDACC__ -__global__ void make_dense_linearised_kernel(float** weights, float*** input, float* output, int depth_input, int dim_input, int size_output) { +__global__ void make_dense_linearized_kernel(float** weights, float* bias, float*** input, float* output, int depth_input, int dim_input, int size_output) { // Équivalents respectifs de i, j et k dans la boucle effectuée par le cpu int idx = threadIdx.x + blockDim.x*blockIdx.x; // < size_output if (idx >= size_output) { return; } - float f = 0; + float f = bias[idx]; for (int i=0; i < depth_input; i++) { for (int j=0; j < dim_input; j++) { @@ -251,24 +251,24 @@ __global__ void make_dense_linearised_kernel(float** weights, float*** input, fl output[idx] = f; } -void make_dense_linearised_device(Kernel_nn* kernel, float*** input, float* output, int depth_input, int dim_input, int size_output) { +void make_dense_linearized_device(Kernel_nn* kernel, float*** input, float* output, int depth_input, int dim_input, int size_output) { // Make computation dim3 gridSize(i_div_up(size_output, BLOCKSIZE_x*BLOCKSIZE_y), 1, 1); dim3 blockSize(BLOCKSIZE_x*BLOCKSIZE_y, 1, BLOCKSIZE_z); - make_dense_linearised_kernel<<>>(kernel->weights, input, output, depth_input, dim_input, size_output); + make_dense_linearized_kernel<<>>(kernel->weights, kernel->bias, input, output, depth_input, dim_input, size_output); gpuErrchk( cudaPeekAtLastError() ); gpuErrchk( cudaDeviceSynchronize() ); } #endif -void make_dense_linearised_cpu(Kernel_nn* kernel, float*** input, float* output, int depth_input, int dim_input, int size_output) { +void make_dense_linearized_cpu(Kernel_nn* kernel, float*** input, float* output, int depth_input, int dim_input, int size_output) { // input[depth_input][dim_input][dim_input] // output[size_output] float f; for (int l=0; l < size_output; l++) { - f = 0; + f = kernel->bias[l]; for (int i=0; i < depth_input; i++) { for (int j=0; j < dim_input; j++) { for (int k=0; k < dim_input; k++) { @@ -283,10 +283,10 @@ void make_dense_linearised_cpu(Kernel_nn* kernel, float*** input, float* output, #ifdef __CUDACC__ extern "C" #endif -void make_dense_linearised(Kernel_nn* kernel, float*** input, float* output, int depth_input, int dim_input, int size_output) { +void make_dense_linearized(Kernel_nn* kernel, float*** input, float* output, int depth_input, int dim_input, int size_output) { #ifndef __CUDACC__ - make_dense_linearised_cpu(kernel, input, output, depth_input, dim_input, size_output); + make_dense_linearized_cpu(kernel, input, output, depth_input, dim_input, size_output); #else - make_dense_linearised_device(kernel, input, output, depth_input, dim_input, size_output); + make_dense_linearized_device(kernel, input, output, depth_input, dim_input, size_output); #endif } diff --git a/src/cnn/make.cu b/src/cnn/make.cu index 46d8732..b29e458 100644 --- a/src/cnn/make.cu +++ b/src/cnn/make.cu @@ -229,17 +229,17 @@ void make_dense(Kernel_nn* kernel, float* input, float* output, int size_input, /* -* Dense linearised +* Dense linearized */ #ifdef __CUDACC__ -__global__ void make_dense_linearised_kernel(float** weights, float*** input, float* output, int depth_input, int dim_input, int size_output) { +__global__ void make_dense_linearized_kernel(float** weights, float* bias, float*** input, float* output, int depth_input, int dim_input, int size_output) { // Équivalents respectifs de i, j et k dans la boucle effectuée par le cpu int idx = threadIdx.x + blockDim.x*blockIdx.x; // < size_output if (idx >= size_output) { return; } - float f = 0; + float f = bias[idx]; for (int i=0; i < depth_input; i++) { for (int j=0; j < dim_input; j++) { @@ -251,24 +251,24 @@ __global__ void make_dense_linearised_kernel(float** weights, float*** input, fl output[idx] = f; } -void make_dense_linearised_device(Kernel_nn* kernel, float*** input, float* output, int depth_input, int dim_input, int size_output) { +void make_dense_linearized_device(Kernel_nn* kernel, float*** input, float* output, int depth_input, int dim_input, int size_output) { // Make computation dim3 gridSize(i_div_up(size_output, BLOCKSIZE_x*BLOCKSIZE_y), 1, 1); dim3 blockSize(BLOCKSIZE_x*BLOCKSIZE_y, 1, BLOCKSIZE_z); - make_dense_linearised_kernel<<>>(kernel->weights, input, output, depth_input, dim_input, size_output); + make_dense_linearized_kernel<<>>(kernel->weights, kernel->bias, input, output, depth_input, dim_input, size_output); gpuErrchk( cudaPeekAtLastError() ); gpuErrchk( cudaDeviceSynchronize() ); } #endif -void make_dense_linearised_cpu(Kernel_nn* kernel, float*** input, float* output, int depth_input, int dim_input, int size_output) { +void make_dense_linearized_cpu(Kernel_nn* kernel, float*** input, float* output, int depth_input, int dim_input, int size_output) { // input[depth_input][dim_input][dim_input] // output[size_output] float f; for (int l=0; l < size_output; l++) { - f = 0; + f = kernel->bias[l]; for (int i=0; i < depth_input; i++) { for (int j=0; j < dim_input; j++) { for (int k=0; k < dim_input; k++) { @@ -283,10 +283,10 @@ void make_dense_linearised_cpu(Kernel_nn* kernel, float*** input, float* output, #ifdef __CUDACC__ extern "C" #endif -void make_dense_linearised(Kernel_nn* kernel, float*** input, float* output, int depth_input, int dim_input, int size_output) { +void make_dense_linearized(Kernel_nn* kernel, float*** input, float* output, int depth_input, int dim_input, int size_output) { #ifndef __CUDACC__ - make_dense_linearised_cpu(kernel, input, output, depth_input, dim_input, size_output); + make_dense_linearized_cpu(kernel, input, output, depth_input, dim_input, size_output); #else - make_dense_linearised_device(kernel, input, output, depth_input, dim_input, size_output); + make_dense_linearized_device(kernel, input, output, depth_input, dim_input, size_output); #endif }