mirror of
https://github.com/augustin64/projet-tipe
synced 2025-02-02 19:39:39 +01:00
Compare commits
4 Commits
c23a126faa
...
3672d07dff
Author | SHA1 | Date | |
---|---|---|---|
3672d07dff | |||
7c2c911976 | |||
|
af288166d6 | ||
|
6e022fbd44 |
16
Makefile
16
Makefile
@ -115,7 +115,19 @@ endif
|
||||
$(BUILDDIR)/cnn-preview: $(CNN_SRCDIR)/preview.c $(BUILDDIR)/cnn_jpeg.o $(BUILDDIR)/colors.o $(BUILDDIR)/utils.o
|
||||
$(CC) $^ -o $@ $(CFLAGS) $(LD_CFLAGS)
|
||||
|
||||
$(BUILDDIR)/cnn-export: $(CNN_SRCDIR)/export.c $(BUILDDIR)/cnn_free.o $(BUILDDIR)/cnn_neuron_io.o $(BUILDDIR)/utils.o $(BUILDDIR)/memory_management.o $(BUILDDIR)/colors.o
|
||||
$(BUILDDIR)/cnn-export: $(CNN_SRCDIR)/export.c \
|
||||
$(BUILDDIR)/cnn_free.o \
|
||||
$(BUILDDIR)/cnn_neuron_io.o \
|
||||
$(BUILDDIR)/utils.o \
|
||||
$(BUILDDIR)/memory_management.o \
|
||||
$(BUILDDIR)/cnn_cnn.o \
|
||||
$(BUILDDIR)/cnn_make.o \
|
||||
$(BUILDDIR)/cnn_backpropagation.o \
|
||||
$(BUILDDIR)/cnn_function.o \
|
||||
$(BUILDDIR)/cnn_convolution.o \
|
||||
$(BUILDDIR)/colors.o \
|
||||
$(BUILDDIR)/mnist.o \
|
||||
$(BUILDDIR)/cnn_jpeg.o
|
||||
$(CC) $^ -o $@ $(CFLAGS) $(LD_CFLAGS)
|
||||
|
||||
$(BUILDDIR)/cnn_%.o: $(CNN_SRCDIR)/%.c $(CNN_SRCDIR)/include/%.h
|
||||
@ -224,4 +236,4 @@ $(CACHE_DIR)/mnist-reseau-cnn.bin: $(BUILDDIR)/cnn-main
|
||||
clean:
|
||||
rm -rf $(BUILDDIR)/*
|
||||
|
||||
#rm -f $(CACHE_DIR)/*
|
||||
#rm -f $(CACHE_DIR)/*
|
||||
|
@ -189,7 +189,7 @@ void forward_propagation(Network* network) {
|
||||
* On copie les valeurs de output dans output_z, puis on applique la fonction d'activation à output_z
|
||||
*/
|
||||
if (k_i->cnn) { // Convolution
|
||||
make_convolution(k_i->cnn, input, output, output_width);
|
||||
make_convolution(k_i->cnn, input, output, output_width, 1);
|
||||
copy_3d_array(output, output_z, output_depth, output_width, output_width);
|
||||
apply_function_to_matrix(activation, output, output_depth, output_width);
|
||||
}
|
||||
@ -208,9 +208,9 @@ void forward_propagation(Network* network) {
|
||||
return;
|
||||
} else { // Pooling sur une matrice
|
||||
if (pooling == AVG_POOLING) {
|
||||
make_average_pooling(input, output, input_width/output_width, output_depth, output_width);
|
||||
make_average_pooling(input, output, input_width/output_width, output_depth, output_width, input_width/output_width);
|
||||
} else if (pooling == MAX_POOLING) {
|
||||
make_max_pooling(input, output, input_width/output_width, output_depth, output_width);
|
||||
make_max_pooling(input, output, input_width/output_width, output_depth, output_width, input_width/output_width);
|
||||
} else {
|
||||
printf_error("Impossible de reconnaître le type de couche de pooling: ");
|
||||
printf("identifiant: %d, position: %d\n", pooling, i);
|
||||
|
@ -5,24 +5,27 @@
|
||||
#include "include/struct.h"
|
||||
#include "../include/utils.h"
|
||||
|
||||
|
||||
#include "include/config.h"
|
||||
|
||||
|
||||
void make_convolution_cpu(Kernel_cnn* kernel, float*** input, float*** output, int output_dim) {
|
||||
void make_convolution_cpu(Kernel_cnn* kernel, float*** input, float*** output, int output_dim, int stride) {
|
||||
// c'est le kernel de input
|
||||
// input[kernel->rows][kernel_k_size + output_dim-1][kernel_k_size + output_dim-1]
|
||||
// output[kernel->columns][output_dim][output_dim]
|
||||
|
||||
int k_size = kernel->k_size;
|
||||
int k_columns = kernel->columns;
|
||||
int k_rows = kernel->rows;
|
||||
float f;
|
||||
|
||||
for (int i=0; i < kernel->columns; i++) { // filtre
|
||||
for (int i=0; i < k_columns; i++) { // filtre
|
||||
for (int j=0; j < output_dim; j++) { // ligne de sortie
|
||||
for (int k=0; k < output_dim; k++) { // colonne de sortie
|
||||
f = kernel->bias[i][j][k];
|
||||
for (int a=0; a < kernel->rows; a++) { // Canal de couleur
|
||||
for (int b=0; b < kernel->k_size; b++) { // ligne du filtre
|
||||
for (int c=0; c < kernel->k_size; c++) { // colonne du filtre
|
||||
f += kernel->weights[a][i][b][c]*input[a][j+b][k+c];
|
||||
for (int a=0; a < k_rows; a++) { // Canal de couleur
|
||||
for (int b=0; b < k_size; b++) { // ligne du filtre
|
||||
for (int c=0; c < k_size; c++) { // colonne du filtre
|
||||
f += kernel->weights[a][i][b][c]*input[a][stride*j+b][stride*k+c];
|
||||
}
|
||||
}
|
||||
}
|
||||
@ -34,7 +37,7 @@ void make_convolution_cpu(Kernel_cnn* kernel, float*** input, float*** output, i
|
||||
|
||||
#ifdef __CUDACC__
|
||||
|
||||
__global__ void make_convolution_kernel(Kernel_cnn* kernel, float*** input, float*** output, int output_dim) {
|
||||
__global__ void make_convolution_kernel(Kernel_cnn* kernel, float*** input, float*** output, int output_dim, int stride) {
|
||||
// Équivalents respectifs de i, j et k dans la boucle effectuée par le cpu
|
||||
int idx = threadIdx.x + blockDim.x*blockIdx.x; // < kernel->columns
|
||||
int idy = threadIdx.y + blockDim.y*blockIdx.y; // < min(output_dim, k_size)
|
||||
@ -49,7 +52,7 @@ __global__ void make_convolution_kernel(Kernel_cnn* kernel, float*** input, floa
|
||||
for (int a=0; a < kernel->rows; a++) {
|
||||
for (int b=0; b < kernel->k_size; b++) {
|
||||
for (int c=0; c < kernel->k_size; c++) {
|
||||
f += kernel->weights[a][idx][b][c]*input[a][idy+b][idz+c];
|
||||
f += kernel->weights[a][idx][b][c]*input[a][idy*stride+b][idz*stride+c];
|
||||
}
|
||||
}
|
||||
}
|
||||
@ -57,21 +60,21 @@ __global__ void make_convolution_kernel(Kernel_cnn* kernel, float*** input, floa
|
||||
output[idx][idy][idz] = f;
|
||||
}
|
||||
|
||||
void make_convolution_device(Kernel_cnn* kernel, float*** input, float*** output, int output_dim) {
|
||||
void make_convolution_device(Kernel_cnn* kernel, float*** input, float*** output, int output_dim, int stride) {
|
||||
// Make computation
|
||||
dim3 gridSize(i_div_up(kernel->columns, BLOCKSIZE_x), i_div_up(output_dim, BLOCKSIZE_y), i_div_up(output_dim, BLOCKSIZE_z));
|
||||
dim3 blockSize(BLOCKSIZE_x, BLOCKSIZE_y, BLOCKSIZE_z);
|
||||
|
||||
make_convolution_kernel<<<gridSize, blockSize>>>(kernel, input, output, output_dim);
|
||||
make_convolution_kernel<<<gridSize, blockSize>>>(kernel, input, output, output_dim, stride);
|
||||
gpuErrchk( cudaPeekAtLastError() );
|
||||
gpuErrchk( cudaDeviceSynchronize() );
|
||||
}
|
||||
#endif
|
||||
|
||||
void make_convolution(Kernel_cnn* kernel, float*** input, float*** output, int output_dim) {
|
||||
void make_convolution(Kernel_cnn* kernel, float*** input, float*** output, int output_dim, int stride) {
|
||||
#ifndef __CUDACC__
|
||||
make_convolution_cpu(kernel, input, output, output_dim);
|
||||
make_convolution_cpu(kernel, input, output, output_dim, stride);
|
||||
#else
|
||||
make_convolution_device(kernel, input, output, output_dim);
|
||||
make_convolution_device(kernel, input, output, output_dim, stride);
|
||||
#endif
|
||||
}
|
@ -8,20 +8,24 @@
|
||||
#include "include/config.h"
|
||||
|
||||
|
||||
void make_convolution_cpu(Kernel_cnn* kernel, float*** input, float*** output, int output_dim) {
|
||||
void make_convolution_cpu(Kernel_cnn* kernel, float*** input, float*** output, int output_dim, int stride) {
|
||||
// c'est le kernel de input
|
||||
// input[kernel->rows][kernel_k_size + output_dim-1][kernel_k_size + output_dim-1]
|
||||
// output[kernel->columns][output_dim][output_dim]
|
||||
|
||||
int k_size = kernel->k_size;
|
||||
int k_columns = kernel->columns;
|
||||
int k_rows = kernel->rows;
|
||||
float f;
|
||||
|
||||
for (int i=0; i < kernel->columns; i++) { // filtre
|
||||
for (int i=0; i < k_columns; i++) { // filtre
|
||||
for (int j=0; j < output_dim; j++) { // ligne de sortie
|
||||
for (int k=0; k < output_dim; k++) { // colonne de sortie
|
||||
f = kernel->bias[i][j][k];
|
||||
for (int a=0; a < kernel->rows; a++) { // Canal de couleur
|
||||
for (int b=0; b < kernel->k_size; b++) { // ligne du filtre
|
||||
for (int c=0; c < kernel->k_size; c++) { // colonne du filtre
|
||||
f += kernel->weights[a][i][b][c]*input[a][j+b][k+c];
|
||||
for (int a=0; a < k_rows; a++) { // Canal de couleur
|
||||
for (int b=0; b < k_size; b++) { // ligne du filtre
|
||||
for (int c=0; c < k_size; c++) { // colonne du filtre
|
||||
f += kernel->weights[a][i][b][c]*input[a][stride*j+b][stride*k+c];
|
||||
}
|
||||
}
|
||||
}
|
||||
@ -33,7 +37,7 @@ void make_convolution_cpu(Kernel_cnn* kernel, float*** input, float*** output, i
|
||||
|
||||
#ifdef __CUDACC__
|
||||
|
||||
__global__ void make_convolution_kernel(Kernel_cnn* kernel, float*** input, float*** output, int output_dim) {
|
||||
__global__ void make_convolution_kernel(Kernel_cnn* kernel, float*** input, float*** output, int output_dim, int stride) {
|
||||
// Équivalents respectifs de i, j et k dans la boucle effectuée par le cpu
|
||||
int idx = threadIdx.x + blockDim.x*blockIdx.x; // < kernel->columns
|
||||
int idy = threadIdx.y + blockDim.y*blockIdx.y; // < min(output_dim, k_size)
|
||||
@ -48,7 +52,7 @@ __global__ void make_convolution_kernel(Kernel_cnn* kernel, float*** input, floa
|
||||
for (int a=0; a < kernel->rows; a++) {
|
||||
for (int b=0; b < kernel->k_size; b++) {
|
||||
for (int c=0; c < kernel->k_size; c++) {
|
||||
f += kernel->weights[a][idx][b][c]*input[a][idy+b][idz+c];
|
||||
f += kernel->weights[a][idx][b][c]*input[a][idy*stride+b][idz*stride+c];
|
||||
}
|
||||
}
|
||||
}
|
||||
@ -56,22 +60,21 @@ __global__ void make_convolution_kernel(Kernel_cnn* kernel, float*** input, floa
|
||||
output[idx][idy][idz] = f;
|
||||
}
|
||||
|
||||
void make_convolution_device(Kernel_cnn* kernel, float*** input, float*** output, int output_dim) {
|
||||
void make_convolution_device(Kernel_cnn* kernel, float*** input, float*** output, int output_dim, int stride) {
|
||||
// Make computation
|
||||
dim3 gridSize(i_div_up(kernel->columns, BLOCKSIZE_x), i_div_up(output_dim, BLOCKSIZE_y), i_div_up(output_dim, BLOCKSIZE_z));
|
||||
dim3 blockSize(BLOCKSIZE_x, BLOCKSIZE_y, BLOCKSIZE_z);
|
||||
|
||||
make_convolution_kernel<<<gridSize, blockSize>>>(kernel, input, output, output_dim);
|
||||
make_convolution_kernel<<<gridSize, blockSize>>>(kernel, input, output, output_dim, stride);
|
||||
gpuErrchk( cudaPeekAtLastError() );
|
||||
gpuErrchk( cudaDeviceSynchronize() );
|
||||
}
|
||||
#endif
|
||||
|
||||
extern "C"
|
||||
void make_convolution(Kernel_cnn* kernel, float*** input, float*** output, int output_dim) {
|
||||
void make_convolution(Kernel_cnn* kernel, float*** input, float*** output, int output_dim, int stride) {
|
||||
#ifndef __CUDACC__
|
||||
make_convolution_cpu(kernel, input, output, output_dim);
|
||||
make_convolution_cpu(kernel, input, output, output_dim, stride);
|
||||
#else
|
||||
make_convolution_device(kernel, input, output, output_dim);
|
||||
make_convolution_device(kernel, input, output, output_dim, stride);
|
||||
#endif
|
||||
}
|
150
src/cnn/export.c
150
src/cnn/export.c
@ -1,15 +1,32 @@
|
||||
#include <stdbool.h>
|
||||
#include <stdlib.h>
|
||||
#include <string.h>
|
||||
#include <stdio.h>
|
||||
#include <math.h>
|
||||
|
||||
#include "include/free.h"
|
||||
#include "include/struct.h"
|
||||
#include "../include/colors.h"
|
||||
#include "include/backpropagation.h"
|
||||
#include "include/neuron_io.h"
|
||||
#include "../include/colors.h"
|
||||
#include "../include/mnist.h"
|
||||
#include "include/struct.h"
|
||||
#include "include/jpeg.h"
|
||||
#include "include/free.h"
|
||||
#include "include/cnn.h"
|
||||
|
||||
|
||||
void help(char* call) {
|
||||
printf("Usage: %s ( print-poids-kernel-cnn ) [OPTIONS]\n\n", call);
|
||||
printf("Usage: %s ( print-poids-kernel-cnn | visual-propagation ) [OPTIONS]\n\n", call);
|
||||
printf("OPTIONS:\n");
|
||||
printf("\tprint-poids-kernel-cnn\n");
|
||||
printf("\t\t--modele | -m [FILENAME]\tFichier contenant le réseau entraîné\n");
|
||||
printf("\tvisual-propagation\n");
|
||||
printf("\t\t--modele | -m [FILENAME]\tFichier contenant le réseau entraîné\n");
|
||||
printf("\t\t--images | -i [FILENAME]\tFichier contenant les images.\n");
|
||||
printf("\t\t--numero | -n [numero]\tNuméro de l'image dont la propagation veut être visualisée\n");
|
||||
printf("\t\t--out | -o [BASE_FILENAME]\tLes images seront stockées dans ${out}_layer-${numéro de couche}_feature-${kernel_numero}.jpeg\n");
|
||||
|
||||
printf("\n");
|
||||
printf_warning("Seul les datasets de type MNIST sont pris en charge pour le moment\n");
|
||||
}
|
||||
|
||||
|
||||
@ -65,6 +82,87 @@ void print_poids_ker_cnn(char* modele) {
|
||||
}
|
||||
|
||||
|
||||
void write_image(float** data, int width, char* base_filename, int layer_id, int kernel_id) {
|
||||
int filename_length = strlen(base_filename) + (int)log10(layer_id+1)+1 + (int)log10(kernel_id+1)+1 + 21;
|
||||
char* filename = (char*)malloc(sizeof(char)*filename_length);
|
||||
|
||||
sprintf(filename, "%s_layer-%d_feature-%d.jpeg", base_filename, layer_id, kernel_id);
|
||||
|
||||
|
||||
imgRawImage* image = (imgRawImage*)malloc(sizeof(imgRawImage));
|
||||
|
||||
image->numComponents = 3;
|
||||
image->width = width;
|
||||
image->height = width;
|
||||
image->lpData = (unsigned char*)malloc(sizeof(unsigned char)*width*width*3);
|
||||
|
||||
for (int i=0; i < width; i++) {
|
||||
for (int j=0; j < width; j++) {
|
||||
float color = fmax(fmin(data[i][j], 1.), 0.)*255;
|
||||
|
||||
image->lpData[(i*width+j)*3] = color;
|
||||
image->lpData[(i*width+j)*3 + 1] = color;
|
||||
image->lpData[(i*width+j)*3 + 2] = color;
|
||||
}
|
||||
}
|
||||
|
||||
storeJpegImageFile(image, filename);
|
||||
|
||||
free(image->lpData);
|
||||
free(image);
|
||||
free(filename);
|
||||
}
|
||||
|
||||
|
||||
void visual_propagation(char* modele_file, char* images_file, char* out_base, int numero) {
|
||||
Network* network = read_network(modele_file);
|
||||
|
||||
int* mnist_parameters = read_mnist_images_parameters(images_file);
|
||||
int*** images = read_mnist_images(images_file);
|
||||
|
||||
int nb_elem = mnist_parameters[0];
|
||||
|
||||
int width = mnist_parameters[1];
|
||||
int height = mnist_parameters[2];
|
||||
free(mnist_parameters);
|
||||
|
||||
if (numero < 0 || numero >= nb_elem) {
|
||||
printf_error("Numéro d'image spécifié invalide.");
|
||||
printf(" Le fichier contient %d images.\n", nb_elem);
|
||||
exit(1);
|
||||
}
|
||||
|
||||
// Forward propagation
|
||||
write_image_in_network_32(images[numero], height, width, network->input[0][0], false);
|
||||
forward_propagation(network);
|
||||
|
||||
for (int i=0; i < network->size-1; i++) {
|
||||
if (i == 0) {
|
||||
write_image(network->input[0][0], width, out_base, 0, 0);
|
||||
} else {
|
||||
if ((!network->kernel[i]->cnn)&&(!network->kernel[i]->nn)) {
|
||||
for (int j=0; j < network->depth[i]; j++) {
|
||||
write_image(network->input[i][j], network->width[i], out_base, i, j);
|
||||
}
|
||||
} else if (!network->kernel[i]->cnn) {
|
||||
// Couche de type NN, on n'affiche rien
|
||||
} else {
|
||||
write_image(network->input[i][0], network->width[i], out_base, i, 0);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
free_network(network);
|
||||
for (int i=0; i < nb_elem; i++) {
|
||||
for (int j=0; j < width; j++) {
|
||||
free(images[i][j]);
|
||||
}
|
||||
free(images[i]);
|
||||
}
|
||||
free(images);
|
||||
}
|
||||
|
||||
|
||||
|
||||
int main(int argc, char* argv[]) {
|
||||
if (argc < 2) {
|
||||
@ -92,6 +190,50 @@ int main(int argc, char* argv[]) {
|
||||
print_poids_ker_cnn(modele);
|
||||
return 0;
|
||||
}
|
||||
if (! strcmp(argv[1], "visual-propagation")) {
|
||||
char* modele = NULL; // Fichier contenant le modèle
|
||||
char* images = NULL; // Dossier contenant les images
|
||||
char* out_base = NULL; // Préfixe du nom de fichier de sortie
|
||||
int numero = -1; // Numéro de l'image dans le dataset
|
||||
int i = 2;
|
||||
while (i < argc) {
|
||||
if ((! strcmp(argv[i], "--modele"))||(! strcmp(argv[i], "-m"))) {
|
||||
modele = argv[i+1];
|
||||
i += 2;
|
||||
} else if ((! strcmp(argv[i], "--images"))||(! strcmp(argv[i], "-i"))) {
|
||||
images = argv[i+1];
|
||||
i += 2;
|
||||
} else if ((! strcmp(argv[i], "--out"))||(! strcmp(argv[i], "-o"))) {
|
||||
out_base = argv[i+1];
|
||||
i += 2;
|
||||
} else if ((! strcmp(argv[i], "--numero"))||(! strcmp(argv[i], "-n"))) {
|
||||
numero = strtol(argv[i+1], NULL, 10);
|
||||
i += 2;
|
||||
} else {
|
||||
printf_warning("Option choisie inconnue: ");
|
||||
printf("%s\n", argv[i]);
|
||||
i++;
|
||||
}
|
||||
}
|
||||
if (!modele) {
|
||||
printf_error("Pas de modèle à utiliser spécifié.\n");
|
||||
return 1;
|
||||
}
|
||||
if (!images) {
|
||||
printf_error("Pas de fichier d'images spécifié.\n");
|
||||
return 1;
|
||||
}
|
||||
if (!out_base) {
|
||||
printf_error("Pas de fichier de sortie spécifié.\n");
|
||||
return 1;
|
||||
}
|
||||
if (numero == -1) {
|
||||
printf_error("Pas de numéro d'image spécifié.\n");
|
||||
return 1;
|
||||
}
|
||||
visual_propagation(modele, images, out_base, numero);
|
||||
return 0;
|
||||
}
|
||||
printf_error("Option choisie non reconnue: ");
|
||||
printf("%s\n", argv[1]);
|
||||
help(argv[0]);
|
||||
|
@ -3,21 +3,21 @@
|
||||
/*
|
||||
* Effectue la convolution naïvement sur le processeur
|
||||
*/
|
||||
void make_convolution_cpu(Kernel_cnn* kernel, float*** input, float*** output, int output_dim);
|
||||
void make_convolution_cpu(Kernel_cnn* kernel, float*** input, float*** output, int output_dim, int stride);
|
||||
|
||||
#ifdef __CUDACC__
|
||||
/*
|
||||
* Kernel de la convolution sur carte graphique
|
||||
*/
|
||||
__global__ void make_convolution_kernel(int k_size, int columns, int rows, float* bias, size_t pitch_bias, float**** weights, size_t pitch_weights, float*** input, size_t pitch_input, float*** output, size_t pitch_output, int output_dim);
|
||||
__global__ void make_convolution_kernel(int k_size, int columns, int rows, float* bias, size_t pitch_bias, float**** weights, size_t pitch_weights, float*** input, size_t pitch_input, float*** output, size_t pitch_output, int output_dim, int stride);
|
||||
|
||||
/*
|
||||
* Effectue la convolution naïvement sur la carte graphique
|
||||
*/
|
||||
void make_convolution_device(Kernel_cnn* kernel, float*** input, float*** output, int output_dim);
|
||||
void make_convolution_device(Kernel_cnn* kernel, float*** input, float*** output, int output_dim, int stride);
|
||||
#endif
|
||||
|
||||
/*
|
||||
* Détermine si la convolution peut-être faite sur la carte graphique au moment de la compilation
|
||||
*/
|
||||
void make_convolution(Kernel_cnn* kernel, float*** input, float*** output, int output_dim);
|
||||
void make_convolution(Kernel_cnn* kernel, float*** input, float*** output, int output_dim, int stride);
|
@ -33,6 +33,11 @@ typedef struct jpegDataset {
|
||||
*/
|
||||
imgRawImage* loadJpegImageFile(char* lpFilename);
|
||||
|
||||
/*
|
||||
* Write a JPEG image to lpFilename
|
||||
*/
|
||||
int storeJpegImageFile(struct imgRawImage* lpImage, char* lpFilename);
|
||||
|
||||
/*
|
||||
* Load a complete dataset from its path
|
||||
*/
|
||||
|
@ -6,12 +6,12 @@
|
||||
/*
|
||||
* Effectue une convolution sans stride sur le processeur
|
||||
*/
|
||||
void make_convolution_cpu(Kernel_cnn* kernel, float*** input, float*** output, int output_dim);
|
||||
void make_convolution_cpu(Kernel_cnn* kernel, float*** input, float*** output, int output_dim, int stride);
|
||||
|
||||
/*
|
||||
* Effectue la convolution sur le CPU ou GPU
|
||||
*/
|
||||
void make_convolution(Kernel_cnn* kernel, float*** input, float*** output, int output_dim);
|
||||
void make_convolution(Kernel_cnn* kernel, float*** input, float*** output, int output_dim, int stride);
|
||||
|
||||
#ifdef __CUDACC__
|
||||
extern "C"
|
||||
@ -19,7 +19,7 @@ extern "C"
|
||||
/*
|
||||
* Effectue un average pooling avec stride=size
|
||||
*/
|
||||
void make_average_pooling(float*** input, float*** output, int size, int output_depth, int output_dim);
|
||||
void make_average_pooling(float*** input, float*** output, int size, int output_depth, int output_dim, int stride);
|
||||
|
||||
#ifdef __CUDACC__
|
||||
extern "C"
|
||||
@ -27,7 +27,7 @@ extern "C"
|
||||
/*
|
||||
* Effectue un max pooling avec stride=size
|
||||
*/
|
||||
void make_max_pooling(float*** input, float*** output, int size, int output_depth, int output_dim);
|
||||
void make_max_pooling(float*** input, float*** output, int size, int output_depth, int output_dim, int stride);
|
||||
|
||||
#ifdef __CUDACC__
|
||||
extern "C"
|
||||
|
@ -15,17 +15,19 @@ typedef struct Kernel_cnn {
|
||||
int k_size; // k_size = dim_input - dim_output + 1
|
||||
int rows; // Depth de l'input
|
||||
int columns; // Depth de l'output
|
||||
|
||||
float*** bias; // bias[columns][dim_output][dim_output]
|
||||
float*** d_bias; // d_bias[columns][dim_output][dim_output]
|
||||
#ifdef ADAM_CNN_BIAS
|
||||
float*** s_d_bias; // s_d_bias[columns][dim_output][dim_output]
|
||||
float*** v_d_bias; // v_d_bias[columns][dim_output][dim_output]
|
||||
float*** s_d_bias; // s_d_bias[columns][dim_output][dim_output]
|
||||
float*** v_d_bias; // v_d_bias[columns][dim_output][dim_output]
|
||||
#endif
|
||||
|
||||
float**** weights; // weights[rows][columns][k_size][k_size]
|
||||
float**** d_weights; // d_weights[rows][columns][k_size][k_size]
|
||||
#ifdef ADAM_CNN_WEIGHTS
|
||||
float**** s_d_weights; // s_d_weights[rows][columns][k_size][k_size]
|
||||
float**** v_d_weights; // v_d_weights[rows][columns][k_size][k_size]
|
||||
float**** s_d_weights; // s_d_weights[rows][columns][k_size][k_size]
|
||||
float**** v_d_weights; // v_d_weights[rows][columns][k_size][k_size]
|
||||
#endif
|
||||
} Kernel_cnn;
|
||||
|
||||
@ -33,23 +35,26 @@ typedef struct Kernel_nn {
|
||||
// Noyau ayant une couche vectorielle en sortie
|
||||
int size_input; // Nombre d'éléments en entrée
|
||||
int size_output; // Nombre d'éléments en sortie
|
||||
|
||||
float* bias; // bias[size_output]
|
||||
float* d_bias; // d_bias[size_output]
|
||||
#ifdef ADAM_DENSE_BIAS
|
||||
float* s_d_bias; // s_d_bias[size_output]
|
||||
float* v_d_bias; // v_d_bias[size_output]
|
||||
float* s_d_bias; // s_d_bias[size_output]
|
||||
float* v_d_bias; // v_d_bias[size_output]
|
||||
#endif
|
||||
|
||||
float** weights; // weight[size_input][size_output]
|
||||
float** d_weights; // d_weights[size_input][size_output]
|
||||
#ifdef ADAM_DENSE_WEIGHTS
|
||||
float** s_d_weights; // s_d_weights[size_input][size_output]
|
||||
float** v_d_weights; // v_d_weights[size_input][size_output]
|
||||
float** s_d_weights; // s_d_weights[size_input][size_output]
|
||||
float** v_d_weights; // v_d_weights[size_input][size_output]
|
||||
#endif
|
||||
} Kernel_nn;
|
||||
|
||||
typedef struct Kernel {
|
||||
Kernel_cnn* cnn; // NULL si ce n'est pas un cnn
|
||||
Kernel_nn* nn; // NULL si ce n'est pas un nn
|
||||
|
||||
int activation; // Id de la fonction d'activation et -Id de sa dérivée
|
||||
int linearisation; // 1 si c'est la linéarisation d'une couche, 0 sinon
|
||||
int pooling; // 0 si pas pooling, 1 si average_pooling, 2 si max_pooling
|
||||
@ -60,10 +65,13 @@ typedef struct Network{
|
||||
int dropout; // Probabilité d'abandon d'un neurone dans [0, 100] (entiers)
|
||||
float learning_rate; // Taux d'apprentissage du réseau
|
||||
int initialisation; // Id du type d'initialisation
|
||||
|
||||
int max_size; // Taille du tableau contenant le réseau
|
||||
int size; // Taille actuelle du réseau (size ≤ max_size)
|
||||
|
||||
int* width; // width[size]
|
||||
int* depth; // depth[size]
|
||||
|
||||
Kernel** kernel; // kernel[size], contient tous les kernels
|
||||
float**** input_z; // Tableau de toutes les couches du réseau input_z[size][couche->depth][couche->width][couche->width]
|
||||
float**** input; // input[i] = f(input_z[i]) où f est la fonction d'activation de la couche i
|
||||
|
@ -74,6 +74,52 @@ imgRawImage* loadJpegImageFile(char* lpFilename) {
|
||||
return lpNewImage;
|
||||
}
|
||||
|
||||
|
||||
int storeJpegImageFile(imgRawImage* lpImage, char* lpFilename) {
|
||||
struct jpeg_compress_struct info;
|
||||
struct jpeg_error_mgr err;
|
||||
|
||||
unsigned char* lpRowBuffer[1];
|
||||
|
||||
FILE* fHandle;
|
||||
|
||||
fHandle = fopen(lpFilename, "wb");
|
||||
if(fHandle == NULL) {
|
||||
#ifdef DEBUG
|
||||
fprintf(stderr, "%s:%u Failed to open output file %s\n", __FILE__, __LINE__, lpFilename);
|
||||
#endif
|
||||
return 1;
|
||||
}
|
||||
|
||||
info.err = jpeg_std_error(&err);
|
||||
jpeg_create_compress(&info);
|
||||
|
||||
jpeg_stdio_dest(&info, fHandle);
|
||||
|
||||
info.image_width = lpImage->width;
|
||||
info.image_height = lpImage->height;
|
||||
info.input_components = 3;
|
||||
info.in_color_space = JCS_RGB;
|
||||
|
||||
jpeg_set_defaults(&info);
|
||||
jpeg_set_quality(&info, 100, TRUE);
|
||||
|
||||
jpeg_start_compress(&info, TRUE);
|
||||
|
||||
/* Write every scanline ... */
|
||||
while(info.next_scanline < info.image_height) {
|
||||
lpRowBuffer[0] = &(lpImage->lpData[info.next_scanline * (lpImage->width * 3)]);
|
||||
jpeg_write_scanlines(&info, lpRowBuffer, 1);
|
||||
}
|
||||
|
||||
jpeg_finish_compress(&info);
|
||||
fclose(fHandle);
|
||||
|
||||
jpeg_destroy_compress(&info);
|
||||
return 0;
|
||||
}
|
||||
|
||||
|
||||
jpegDataset* loadJpegDataset(char* folderPath) {
|
||||
jpegDataset* dataset = (jpegDataset*)malloc(sizeof(jpegDataset));
|
||||
imgRawImage* image;
|
||||
|
@ -15,7 +15,7 @@
|
||||
* Average Pooling
|
||||
*/
|
||||
#ifdef __CUDACC__
|
||||
__global__ void make_average_pooling_kernel(float*** input, float*** output, int size, int output_depth, int output_width) {
|
||||
__global__ void make_average_pooling_kernel(float*** input, float*** output, int size, int output_depth, int output_width, int stride) {
|
||||
// Équivalents respectifs de i, j et k dans la boucle effectuée par le cpu
|
||||
int idx = threadIdx.x + blockDim.x*blockIdx.x; // < output_depth
|
||||
int idy = threadIdx.y + blockDim.y*blockIdx.y; // < output_width
|
||||
@ -30,24 +30,24 @@ __global__ void make_average_pooling_kernel(float*** input, float*** output, int
|
||||
|
||||
for (int a=0; a < size; a++) {
|
||||
for (int b=0; b < size; b++) {
|
||||
sum += input[idx][size*idy +a][size*idz +b];
|
||||
sum += input[idx][stride*idy +a][stride*idz +b];
|
||||
}
|
||||
}
|
||||
output[idx][idy][idz] = sum/(float)n;
|
||||
}
|
||||
|
||||
void make_average_pooling_device(float*** input, float*** output, int size, int output_depth, int output_width) {
|
||||
void make_average_pooling_device(float*** input, float*** output, int size, int output_depth, int output_width, int stride) {
|
||||
// Make computation
|
||||
dim3 gridSize(i_div_up(output_depth, BLOCKSIZE_x), i_div_up(output_width, BLOCKSIZE_y), i_div_up(output_width, BLOCKSIZE_z));
|
||||
dim3 blockSize(BLOCKSIZE_x, BLOCKSIZE_y, BLOCKSIZE_z);
|
||||
|
||||
make_average_pooling_kernel<<<gridSize, blockSize>>>(input, output, size, output_depth, output_width);
|
||||
make_average_pooling_kernel<<<gridSize, blockSize>>>(input, output, size, output_depth, output_width, stride);
|
||||
gpuErrchk( cudaPeekAtLastError() );
|
||||
gpuErrchk( cudaDeviceSynchronize() );
|
||||
}
|
||||
#endif
|
||||
|
||||
void make_average_pooling_cpu(float*** input, float*** output, int size, int output_depth, int output_width) {
|
||||
void make_average_pooling_cpu(float*** input, float*** output, int size, int output_depth, int output_width, int stride) {
|
||||
// input[output_depth][output_width+size-1][output_width+size-1]
|
||||
// output[output_depth][output_width][output_width]
|
||||
float sum;
|
||||
@ -59,7 +59,7 @@ void make_average_pooling_cpu(float*** input, float*** output, int size, int out
|
||||
sum = 0;
|
||||
for (int a=0; a < size; a++) {
|
||||
for (int b=0; b < size; b++) {
|
||||
sum += input[i][size*j +a][size*k +b];
|
||||
sum += input[i][stride*j +a][stride*k +b];
|
||||
}
|
||||
}
|
||||
output[i][j][k] = sum/(float)n;
|
||||
@ -71,11 +71,11 @@ void make_average_pooling_cpu(float*** input, float*** output, int size, int out
|
||||
#ifdef __CUDACC__
|
||||
extern "C"
|
||||
#endif
|
||||
void make_average_pooling(float*** input, float*** output, int size, int output_depth, int output_width) {
|
||||
void make_average_pooling(float*** input, float*** output, int size, int output_depth, int output_width, int stride) {
|
||||
#ifndef __CUDACC__
|
||||
make_average_pooling_cpu(input, output, size, output_depth, output_width);
|
||||
make_average_pooling_cpu(input, output, size, output_depth, output_width, stride);
|
||||
#else
|
||||
make_average_pooling_device(input, output, size, output_depth, output_width);
|
||||
make_average_pooling_device(input, output, size, output_depth, output_width, stride);
|
||||
#endif
|
||||
}
|
||||
|
||||
@ -87,7 +87,7 @@ void make_average_pooling(float*** input, float*** output, int size, int output_
|
||||
* Max Pooling
|
||||
*/
|
||||
#ifdef __CUDACC__
|
||||
__global__ void make_max_pooling_kernel(float*** input, float*** output, int size, int output_depth, int output_width) {
|
||||
__global__ void make_max_pooling_kernel(float*** input, float*** output, int size, int output_depth, int output_width, int stride) {
|
||||
// Équivalents respectifs de i, j et k dans la boucle effectuée par le cpu
|
||||
int idx = threadIdx.x + blockDim.x*blockIdx.x; // < output_depth
|
||||
int idy = threadIdx.y + blockDim.y*blockIdx.y; // < output_width
|
||||
@ -102,25 +102,25 @@ __global__ void make_max_pooling_kernel(float*** input, float*** output, int siz
|
||||
|
||||
for (int a=0; a < size; a++) {
|
||||
for (int b=0; b < size; b++) {
|
||||
temp = input[idx][size*idy +a][size*idz +b];
|
||||
temp = input[idx][stride*idy +a][stride*idz +b];
|
||||
m = m > temp ? m : temp; // max(m, temp)
|
||||
}
|
||||
}
|
||||
output[idx][idy][idz] = m;
|
||||
}
|
||||
|
||||
void make_max_pooling_device(float*** input, float*** output, int size, int output_depth, int output_width) {
|
||||
void make_max_pooling_device(float*** input, float*** output, int size, int output_depth, int output_width, int stride) {
|
||||
// Make computation
|
||||
dim3 gridSize(i_div_up(output_depth, BLOCKSIZE_x), i_div_up(output_width, BLOCKSIZE_y), i_div_up(output_width, BLOCKSIZE_z));
|
||||
dim3 blockSize(BLOCKSIZE_x, BLOCKSIZE_y, BLOCKSIZE_z);
|
||||
|
||||
make_max_pooling_kernel<<<gridSize, blockSize>>>(input, output, size, output_depth, output_width);
|
||||
make_max_pooling_kernel<<<gridSize, blockSize>>>(input, output, size, output_depth, output_width, stride);
|
||||
gpuErrchk( cudaPeekAtLastError() );
|
||||
gpuErrchk( cudaDeviceSynchronize() );
|
||||
}
|
||||
#endif
|
||||
|
||||
void make_max_pooling_cpu(float*** input, float*** output, int size, int output_depth, int output_width) {
|
||||
void make_max_pooling_cpu(float*** input, float*** output, int size, int output_depth, int output_width, int stride) {
|
||||
// input[output_depth][output_width+size-1][output_width+size-1]
|
||||
// output[output_depth][output_width][output_width]
|
||||
float m;
|
||||
@ -130,7 +130,7 @@ void make_max_pooling_cpu(float*** input, float*** output, int size, int output_
|
||||
m = -FLT_MAX;
|
||||
for (int a=0; a < size; a++) {
|
||||
for (int b=0; b < size; b++) {
|
||||
m = fmaxf(m, input[i][size*j +a][size*k +b]);
|
||||
m = fmaxf(m, input[i][stride*j +a][stride*k +b]);
|
||||
}
|
||||
}
|
||||
output[i][j][k] = m;
|
||||
@ -142,11 +142,11 @@ void make_max_pooling_cpu(float*** input, float*** output, int size, int output_
|
||||
#ifdef __CUDACC__
|
||||
extern "C"
|
||||
#endif
|
||||
void make_max_pooling(float*** input, float*** output, int size, int output_depth, int output_width) {
|
||||
void make_max_pooling(float*** input, float*** output, int size, int output_depth, int output_width, int stride) {
|
||||
#ifndef __CUDACC__
|
||||
make_max_pooling_cpu(input, output, size, output_depth, output_width);
|
||||
make_max_pooling_cpu(input, output, size, output_depth, output_width, stride);
|
||||
#else
|
||||
make_max_pooling_device(input, output, size, output_depth, output_width);
|
||||
make_max_pooling_device(input, output, size, output_depth, output_width, stride);
|
||||
#endif
|
||||
}
|
||||
|
||||
|
@ -15,7 +15,7 @@
|
||||
* Average Pooling
|
||||
*/
|
||||
#ifdef __CUDACC__
|
||||
__global__ void make_average_pooling_kernel(float*** input, float*** output, int size, int output_depth, int output_width) {
|
||||
__global__ void make_average_pooling_kernel(float*** input, float*** output, int size, int output_depth, int output_width, int stride) {
|
||||
// Équivalents respectifs de i, j et k dans la boucle effectuée par le cpu
|
||||
int idx = threadIdx.x + blockDim.x*blockIdx.x; // < output_depth
|
||||
int idy = threadIdx.y + blockDim.y*blockIdx.y; // < output_width
|
||||
@ -30,24 +30,24 @@ __global__ void make_average_pooling_kernel(float*** input, float*** output, int
|
||||
|
||||
for (int a=0; a < size; a++) {
|
||||
for (int b=0; b < size; b++) {
|
||||
sum += input[idx][size*idy +a][size*idz +b];
|
||||
sum += input[idx][stride*idy +a][stride*idz +b];
|
||||
}
|
||||
}
|
||||
output[idx][idy][idz] = sum/(float)n;
|
||||
}
|
||||
|
||||
void make_average_pooling_device(float*** input, float*** output, int size, int output_depth, int output_width) {
|
||||
void make_average_pooling_device(float*** input, float*** output, int size, int output_depth, int output_width, int stride) {
|
||||
// Make computation
|
||||
dim3 gridSize(i_div_up(output_depth, BLOCKSIZE_x), i_div_up(output_width, BLOCKSIZE_y), i_div_up(output_width, BLOCKSIZE_z));
|
||||
dim3 blockSize(BLOCKSIZE_x, BLOCKSIZE_y, BLOCKSIZE_z);
|
||||
|
||||
make_average_pooling_kernel<<<gridSize, blockSize>>>(input, output, size, output_depth, output_width);
|
||||
make_average_pooling_kernel<<<gridSize, blockSize>>>(input, output, size, output_depth, output_width, stride);
|
||||
gpuErrchk( cudaPeekAtLastError() );
|
||||
gpuErrchk( cudaDeviceSynchronize() );
|
||||
}
|
||||
#endif
|
||||
|
||||
void make_average_pooling_cpu(float*** input, float*** output, int size, int output_depth, int output_width) {
|
||||
void make_average_pooling_cpu(float*** input, float*** output, int size, int output_depth, int output_width, int stride) {
|
||||
// input[output_depth][output_width+size-1][output_width+size-1]
|
||||
// output[output_depth][output_width][output_width]
|
||||
float sum;
|
||||
@ -59,7 +59,7 @@ void make_average_pooling_cpu(float*** input, float*** output, int size, int out
|
||||
sum = 0;
|
||||
for (int a=0; a < size; a++) {
|
||||
for (int b=0; b < size; b++) {
|
||||
sum += input[i][size*j +a][size*k +b];
|
||||
sum += input[i][stride*j +a][stride*k +b];
|
||||
}
|
||||
}
|
||||
output[i][j][k] = sum/(float)n;
|
||||
@ -71,11 +71,11 @@ void make_average_pooling_cpu(float*** input, float*** output, int size, int out
|
||||
#ifdef __CUDACC__
|
||||
extern "C"
|
||||
#endif
|
||||
void make_average_pooling(float*** input, float*** output, int size, int output_depth, int output_width) {
|
||||
void make_average_pooling(float*** input, float*** output, int size, int output_depth, int output_width, int stride) {
|
||||
#ifndef __CUDACC__
|
||||
make_average_pooling_cpu(input, output, size, output_depth, output_width);
|
||||
make_average_pooling_cpu(input, output, size, output_depth, output_width, stride);
|
||||
#else
|
||||
make_average_pooling_device(input, output, size, output_depth, output_width);
|
||||
make_average_pooling_device(input, output, size, output_depth, output_width, stride);
|
||||
#endif
|
||||
}
|
||||
|
||||
@ -87,7 +87,7 @@ void make_average_pooling(float*** input, float*** output, int size, int output_
|
||||
* Max Pooling
|
||||
*/
|
||||
#ifdef __CUDACC__
|
||||
__global__ void make_max_pooling_kernel(float*** input, float*** output, int size, int output_depth, int output_width) {
|
||||
__global__ void make_max_pooling_kernel(float*** input, float*** output, int size, int output_depth, int output_width, int stride) {
|
||||
// Équivalents respectifs de i, j et k dans la boucle effectuée par le cpu
|
||||
int idx = threadIdx.x + blockDim.x*blockIdx.x; // < output_depth
|
||||
int idy = threadIdx.y + blockDim.y*blockIdx.y; // < output_width
|
||||
@ -102,25 +102,25 @@ __global__ void make_max_pooling_kernel(float*** input, float*** output, int siz
|
||||
|
||||
for (int a=0; a < size; a++) {
|
||||
for (int b=0; b < size; b++) {
|
||||
temp = input[idx][size*idy +a][size*idz +b];
|
||||
temp = input[idx][stride*idy +a][stride*idz +b];
|
||||
m = m > temp ? m : temp; // max(m, temp)
|
||||
}
|
||||
}
|
||||
output[idx][idy][idz] = m;
|
||||
}
|
||||
|
||||
void make_max_pooling_device(float*** input, float*** output, int size, int output_depth, int output_width) {
|
||||
void make_max_pooling_device(float*** input, float*** output, int size, int output_depth, int output_width, int stride) {
|
||||
// Make computation
|
||||
dim3 gridSize(i_div_up(output_depth, BLOCKSIZE_x), i_div_up(output_width, BLOCKSIZE_y), i_div_up(output_width, BLOCKSIZE_z));
|
||||
dim3 blockSize(BLOCKSIZE_x, BLOCKSIZE_y, BLOCKSIZE_z);
|
||||
|
||||
make_max_pooling_kernel<<<gridSize, blockSize>>>(input, output, size, output_depth, output_width);
|
||||
make_max_pooling_kernel<<<gridSize, blockSize>>>(input, output, size, output_depth, output_width, stride);
|
||||
gpuErrchk( cudaPeekAtLastError() );
|
||||
gpuErrchk( cudaDeviceSynchronize() );
|
||||
}
|
||||
#endif
|
||||
|
||||
void make_max_pooling_cpu(float*** input, float*** output, int size, int output_depth, int output_width) {
|
||||
void make_max_pooling_cpu(float*** input, float*** output, int size, int output_depth, int output_width, int stride) {
|
||||
// input[output_depth][output_width+size-1][output_width+size-1]
|
||||
// output[output_depth][output_width][output_width]
|
||||
float m;
|
||||
@ -130,7 +130,7 @@ void make_max_pooling_cpu(float*** input, float*** output, int size, int output_
|
||||
m = -FLT_MAX;
|
||||
for (int a=0; a < size; a++) {
|
||||
for (int b=0; b < size; b++) {
|
||||
m = fmaxf(m, input[i][size*j +a][size*k +b]);
|
||||
m = fmaxf(m, input[i][stride*j +a][stride*k +b]);
|
||||
}
|
||||
}
|
||||
output[i][j][k] = m;
|
||||
@ -142,11 +142,11 @@ void make_max_pooling_cpu(float*** input, float*** output, int size, int output_
|
||||
#ifdef __CUDACC__
|
||||
extern "C"
|
||||
#endif
|
||||
void make_max_pooling(float*** input, float*** output, int size, int output_depth, int output_width) {
|
||||
void make_max_pooling(float*** input, float*** output, int size, int output_depth, int output_width, int stride) {
|
||||
#ifndef __CUDACC__
|
||||
make_max_pooling_cpu(input, output, size, output_depth, output_width);
|
||||
make_max_pooling_cpu(input, output, size, output_depth, output_width, stride);
|
||||
#else
|
||||
make_max_pooling_device(input, output, size, output_depth, output_width);
|
||||
make_max_pooling_device(input, output, size, output_depth, output_width, stride);
|
||||
#endif
|
||||
}
|
||||
|
||||
|
@ -157,7 +157,7 @@ void run_convolution_test(int input_dim, int output_dim, int rows, int columns)
|
||||
double cpu_time_used, gpu_time_used;
|
||||
|
||||
start = clock();
|
||||
make_convolution_device(kernel, input, output_gpu, output_dim);
|
||||
make_convolution_device(kernel, input, output_gpu, output_dim, 1);
|
||||
end = clock();
|
||||
|
||||
gpu_time_used = ((double) (end - start)) / CLOCKS_PER_SEC;
|
||||
@ -165,7 +165,7 @@ void run_convolution_test(int input_dim, int output_dim, int rows, int columns)
|
||||
|
||||
|
||||
start = clock();
|
||||
make_convolution_cpu(kernel, input, output_cpu, output_dim);
|
||||
make_convolution_cpu(kernel, input, output_cpu, output_dim, 1);
|
||||
end = clock();
|
||||
|
||||
cpu_time_used = ((double) (end - start)) / CLOCKS_PER_SEC;
|
||||
|
@ -140,7 +140,7 @@ void run_convolution_test(int input_dim, int output_dim, int rows, int columns)
|
||||
double cpu_time_used, gpu_time_used;
|
||||
|
||||
start_time = omp_get_wtime();
|
||||
make_convolution_device(kernel, input, output_gpu, output_dim);
|
||||
make_convolution_device(kernel, input, output_gpu, output_dim, 1);
|
||||
end_time = omp_get_wtime();
|
||||
|
||||
|
||||
@ -149,7 +149,7 @@ void run_convolution_test(int input_dim, int output_dim, int rows, int columns)
|
||||
|
||||
|
||||
start_time = omp_get_wtime();
|
||||
make_convolution_cpu(kernel, input, output_cpu, output_dim);
|
||||
make_convolution_cpu(kernel, input, output_cpu, output_dim, 1);
|
||||
end_time = omp_get_wtime();
|
||||
|
||||
cpu_time_used = end_time - start_time;
|
||||
|
Loading…
Reference in New Issue
Block a user