tipe/src/cnn/convolution.c

#include <stdlib.h>
#include <stdio.h>
#include <stdbool.h>

#include "include/struct.h"
#include "../common/include/utils.h"

#include "include/config.h"

#ifdef __CUDACC__
__host__ __device__
#endif
int convolution_not_outside(int x, int y, int lower_bound, int upper_bound) {
    return !(x < lower_bound || y < lower_bound || x >= upper_bound || y>= upper_bound);
}

void make_convolution_cpu(Kernel_cnn* kernel, float*** input, float*** output, int output_width, int stride, int padding) {
    // c'est le kernel de input
    // input[kernel->rows][kernel_k_size + output_width-1][kernel_k_size + output_width-1]
    // output[kernel->columns][output_width][output_width]
    
    int k_columns = kernel->columns;
    int k_rows = kernel->rows;
    int max_move = kernel->k_size - padding;
    int input_width = output_width*stride - 2*padding + kernel->k_size - stride;
    float f;

    for (int i=0; i < k_columns; i++) { // filtre
        for (int j=0; j < output_width; j++) { // ligne de sortie
            for (int k=0; k < output_width; k++) { // colonne de sortie
                f = kernel->bias[i][j][k];
                for (int a=0; a < k_rows; a++) { // Canal de couleur
                    for (int b=-padding; b < max_move; b++) { // ligne du filtre
                        for (int c=-padding; c < max_move; c++) { // colonne du filtre
                            int x = (stride*j+b);
                            int y = (stride*k+c);
                            if (convolution_not_outside(x, y, 0, input_width)) {
                                f += kernel->weights[a][i][b][c]*input[a][stride*j+b][stride*k+c];
                            }
                        }
                    }
                }
                output[i][j][k] = f;
            }
        }
    }
}

#ifdef __CUDACC__

__global__ void make_convolution_kernel(Kernel_cnn* kernel, float*** input, float*** output, int output_width, int stride, int padding) {
    // Équivalents respectifs de i, j et k dans la boucle effectuée par le cpu
    int idx = threadIdx.x + blockDim.x*blockIdx.x; // < kernel->columns
    int idy = threadIdx.y + blockDim.y*blockIdx.y; // < min(output_width, k_size)
    int idz = threadIdx.z + blockDim.z*blockIdx.z; // < min(output_width, k_size)
    int max_move = kernel->k_size - padding;
    int input_width = output_width*stride - 2*padding + kernel->k_size - stride;

    if (idx >= kernel->columns || idy >= output_width || idz >= output_width) {
        return;
    }

    float f = kernel->bias[idx][idy][idz];

    for (int a=0; a < kernel->rows; a++) {
        for (int b=-padding; b < max_move; b++) {
            for (int c=-padding; c < max_move; c++) {
                int idy_2 = idy*stride+b;
                int idz_2 = idz*stride+c;
                if (convolution_not_outside(idy_2, idz_2, 0, input_width)) {
                    f += kernel->weights[a][idx][b][c]*input[a][idy_2][idz_2];
                }
            }
        }
    }

    output[idx][idy][idz] = f;
}

void make_convolution_device(Kernel_cnn* kernel, float*** input, float*** output, int output_width, int stride, int padding) {
    // Make computation
    dim3 gridSize(i_div_up(kernel->columns, BLOCKSIZE_x), i_div_up(output_width, BLOCKSIZE_y), i_div_up(output_width, BLOCKSIZE_z));
    dim3 blockSize(BLOCKSIZE_x, BLOCKSIZE_y, BLOCKSIZE_z);

    make_convolution_kernel<<<gridSize, blockSize>>>(kernel, input, output, output_width, stride, padding);
    gpuErrchk( cudaPeekAtLastError() );
    gpuErrchk( cudaDeviceSynchronize() );
}
#endif

#ifdef __CUDACC__
extern "C"
#endif
void make_convolution(Kernel_cnn* kernel, float*** input, float*** output, int output_width, int stride, int padding) {
    #ifndef __CUDACC__
    make_convolution_cpu(kernel, input, output, output_width, stride, padding);
    #else
    make_convolution_device(kernel, input, output, output_width, stride, padding);
    #endif
}
Add cuda convolution 2022-11-01 17:24:29 +01:00			`#include <stdlib.h>`
			`#include <stdio.h>`
			`#include <stdbool.h>`

			`#include "include/struct.h"`
Move generic files to `src/common` 2023-05-12 16:16:34 +02:00			`#include "../common/include/utils.h"`
Update Makefile 2022-11-09 12:55:55 +01:00
Add CUDA blocksize to config.h 2023-03-28 12:54:49 +02:00			`#include "include/config.h"`
Ajout de utils.cu & recoloration des tests Ajout de test.cu: Suppression de redondances dans le code Recoloration des tests: Ajout de couleurs pour clarifier le bon fonctionnement ou non du programme 2022-11-11 11:20:30 +01:00
Fix cuda compilation 2023-05-13 22:42:13 +02:00			`#ifdef __CUDACC__`
			`__host__ __device__`
			`#endif`
Add 'stride' and 'padding' to the forward 2023-05-13 13:37:46 +02:00			`int convolution_not_outside(int x, int y, int lower_bound, int upper_bound) {`
			`return !(x < lower_bound \|\| y < lower_bound \|\| x >= upper_bound \|\| y>= upper_bound);`
			`}`

Change notation toward a consensus 2023-05-13 17:22:47 +02:00			`void make_convolution_cpu(Kernel_cnn* kernel, float* input, float* output, int output_width, int stride, int padding) {`
Add cuda convolution 2022-11-01 17:24:29 +01:00			`// c'est le kernel de input`
Change notation toward a consensus 2023-05-13 17:22:47 +02:00			`// input[kernel->rows][kernel_k_size + output_width-1][kernel_k_size + output_width-1]`
			`// output[kernel->columns][output_width][output_width]`
Ajout de 'stride' dans 'make_convolution' 2023-05-08 11:11:55 +02:00
			`int k_columns = kernel->columns;`
			`int k_rows = kernel->rows;`
Add 'stride' and 'padding' to the forward 2023-05-13 13:37:46 +02:00			`int max_move = kernel->k_size - padding;`
Change notation toward a consensus 2023-05-13 17:22:47 +02:00			`int input_width = output_widthstride - 2padding + kernel->k_size - stride;`
Add cuda convolution 2022-11-01 17:24:29 +01:00			`float f;`
Remove white spaces 2023-01-17 15:34:29 +01:00
Ajout de 'stride' dans 'make_convolution' 2023-05-08 11:11:55 +02:00			`for (int i=0; i < k_columns; i++) { // filtre`
Change notation toward a consensus 2023-05-13 17:22:47 +02:00			`for (int j=0; j < output_width; j++) { // ligne de sortie`
			`for (int k=0; k < output_width; k++) { // colonne de sortie`
Back to multiple bias implementation 2023-03-18 13:25:58 +01:00			`f = kernel->bias[i][j][k];`
Ajout de 'stride' dans 'make_convolution' 2023-05-08 11:11:55 +02:00			`for (int a=0; a < k_rows; a++) { // Canal de couleur`
Add 'stride' and 'padding' to the forward 2023-05-13 13:37:46 +02:00			`for (int b=-padding; b < max_move; b++) { // ligne du filtre`
			`for (int c=-padding; c < max_move; c++) { // colonne du filtre`
			`int x = (stride*j+b);`
			`int y = (stride*k+c);`
Change notation toward a consensus 2023-05-13 17:22:47 +02:00			`if (convolution_not_outside(x, y, 0, input_width)) {`
Add 'stride' and 'padding' to the forward 2023-05-13 13:37:46 +02:00			`f += kernel->weights[a][i][b][c]input[a][stridej+b][stride*k+c];`
			`}`
Add cuda convolution 2022-11-01 17:24:29 +01:00			`}`
			`}`
			`}`
Fix convolution 2022-11-03 11:26:08 +01:00			`output[i][j][k] = f;`
Add cuda convolution 2022-11-01 17:24:29 +01:00			`}`
			`}`
			`}`
			`}`

			`#ifdef __CUDACC__`

Change notation toward a consensus 2023-05-13 17:22:47 +02:00			`__global__ void make_convolution_kernel(Kernel_cnn* kernel, float* input, float* output, int output_width, int stride, int padding) {`
Add cuda convolution 2022-11-01 17:24:29 +01:00			`// Équivalents respectifs de i, j et k dans la boucle effectuée par le cpu`
			`int idx = threadIdx.x + blockDim.x*blockIdx.x; // < kernel->columns`
Change notation toward a consensus 2023-05-13 17:22:47 +02:00			`int idy = threadIdx.y + blockDim.y*blockIdx.y; // < min(output_width, k_size)`
			`int idz = threadIdx.z + blockDim.z*blockIdx.z; // < min(output_width, k_size)`
Add 'stride' and 'padding' to the forward 2023-05-13 13:37:46 +02:00			`int max_move = kernel->k_size - padding;`
Change notation toward a consensus 2023-05-13 17:22:47 +02:00			`int input_width = output_widthstride - 2padding + kernel->k_size - stride;`
Add cuda convolution 2022-11-01 17:24:29 +01:00
Change notation toward a consensus 2023-05-13 17:22:47 +02:00			`if (idx >= kernel->columns \|\| idy >= output_width \|\| idz >= output_width) {`
Add cuda convolution 2022-11-01 17:24:29 +01:00			`return;`
			`}`

Back to multiple bias implementation 2023-03-18 13:25:58 +01:00			`float f = kernel->bias[idx][idy][idz];`
Remove white spaces 2023-01-17 15:34:29 +01:00
Implement custom memory management 2023-01-28 22:04:38 +01:00			`for (int a=0; a < kernel->rows; a++) {`
Add 'stride' and 'padding' to the forward 2023-05-13 13:37:46 +02:00			`for (int b=-padding; b < max_move; b++) {`
			`for (int c=-padding; c < max_move; c++) {`
			`int idy_2 = idy*stride+b;`
			`int idz_2 = idz*stride+c;`
Change notation toward a consensus 2023-05-13 17:22:47 +02:00			`if (convolution_not_outside(idy_2, idz_2, 0, input_width)) {`
Add 'stride' and 'padding' to the forward 2023-05-13 13:37:46 +02:00			`f += kernel->weights[a][idx][b][c]*input[a][idy_2][idz_2];`
			`}`
Add cuda convolution 2022-11-01 17:24:29 +01:00			`}`
			`}`
			`}`

Implement custom memory management 2023-01-28 22:04:38 +01:00			`output[idx][idy][idz] = f;`
Add cuda convolution 2022-11-01 17:24:29 +01:00			`}`

Change notation toward a consensus 2023-05-13 17:22:47 +02:00			`void make_convolution_device(Kernel_cnn* kernel, float* input, float* output, int output_width, int stride, int padding) {`
Add cuda convolution 2022-11-01 17:24:29 +01:00			`// Make computation`
Change notation toward a consensus 2023-05-13 17:22:47 +02:00			`dim3 gridSize(i_div_up(kernel->columns, BLOCKSIZE_x), i_div_up(output_width, BLOCKSIZE_y), i_div_up(output_width, BLOCKSIZE_z));`
Add cuda convolution 2022-11-01 17:24:29 +01:00			`dim3 blockSize(BLOCKSIZE_x, BLOCKSIZE_y, BLOCKSIZE_z);`

Change notation toward a consensus 2023-05-13 17:22:47 +02:00			`make_convolution_kernel<<<gridSize, blockSize>>>(kernel, input, output, output_width, stride, padding);`
Add cuda convolution 2022-11-01 17:24:29 +01:00			`gpuErrchk( cudaPeekAtLastError() );`
			`gpuErrchk( cudaDeviceSynchronize() );`
			`}`
			`#endif`

Fix cuda compilation 2023-05-13 22:42:13 +02:00			`#ifdef __CUDACC__`
			`extern "C"`
			`#endif`
Change notation toward a consensus 2023-05-13 17:22:47 +02:00			`void make_convolution(Kernel_cnn* kernel, float* input, float* output, int output_width, int stride, int padding) {`
Add cuda convolution 2022-11-01 17:24:29 +01:00			`#ifndef __CUDACC__`
Change notation toward a consensus 2023-05-13 17:22:47 +02:00			`make_convolution_cpu(kernel, input, output, output_width, stride, padding);`
Add cuda convolution 2022-11-01 17:24:29 +01:00			`#else`
Change notation toward a consensus 2023-05-13 17:22:47 +02:00			`make_convolution_device(kernel, input, output, output_width, stride, padding);`
Add cuda convolution 2022-11-01 17:24:29 +01:00			`#endif`
			`}`