diff --git a/Makefile b/Makefile index 5bd174b..20df01a 100644 --- a/Makefile +++ b/Makefile @@ -58,6 +58,9 @@ $(BUILDDIR)/mnist-preview: $(MNIST_SRCDIR)/preview.c $(BUILDDIR)/mnist.o $(BUILDDIR)/mnist.o: $(MNIST_SRCDIR)/mnist.c $(MNIST_SRCDIR)/include/mnist.h $(CC) -c $< -o $@ $(CFLAGS) +$(BUILDDIR)/mnist.cuda.o: $(MNIST_SRCDIR)/mnist.c $(MNIST_SRCDIR)/include/mnist.h + $(CC) -c $< -o $@ $(CFLAGS) -DUSE_CUDA -lcuda -I/opt/cuda/include + $(BUILDDIR)/mnist_%.o: $(MNIST_SRCDIR)/%.c $(MNIST_SRCDIR)/include/%.h $(CC) -c $< -o $@ $(CFLAGS) @@ -67,22 +70,60 @@ $(BUILDDIR)/mnist_%.o: $(MNIST_SRCDIR)/%.c $(MNIST_SRCDIR)/include/%.h # cnn: $(BUILDDIR)/cnn-main $(BUILDDIR)/cnn-main-cuda $(BUILDDIR)/cnn-preview; -$(BUILDDIR)/cnn-main: $(CNN_SRCDIR)/main.c $(BUILDDIR)/cnn_train.o $(BUILDDIR)/cnn_test_network.o $(BUILDDIR)/cnn_cnn.o $(BUILDDIR)/cnn_creation.o $(BUILDDIR)/cnn_initialisation.o $(BUILDDIR)/cnn_make.o $(BUILDDIR)/cnn_neuron_io.o $(BUILDDIR)/cnn_function.o $(BUILDDIR)/cnn_utils.o $(BUILDDIR)/cnn_update.o $(BUILDDIR)/cnn_free.o $(BUILDDIR)/cnn_jpeg.o $(BUILDDIR)/cnn_convolution.o $(BUILDDIR)/cnn_backpropagation.o $(BUILDDIR)/colors.o $(BUILDDIR)/mnist.o +$(BUILDDIR)/cnn-main: $(CNN_SRCDIR)/main.c \ + $(BUILDDIR)/cnn_train.o \ + $(BUILDDIR)/cnn_test_network.o \ + $(BUILDDIR)/cnn_cnn.o \ + $(BUILDDIR)/cnn_creation.o \ + $(BUILDDIR)/cnn_initialisation.o \ + $(BUILDDIR)/cnn_make.o \ + $(BUILDDIR)/cnn_neuron_io.o \ + $(BUILDDIR)/cnn_function.o \ + $(BUILDDIR)/cnn_utils.o \ + $(BUILDDIR)/cnn_update.o \ + $(BUILDDIR)/cnn_free.o \ + $(BUILDDIR)/cnn_jpeg.o \ + $(BUILDDIR)/cnn_convolution.o \ + $(BUILDDIR)/cnn_backpropagation.o \ + $(BUILDDIR)/colors.o \ + $(BUILDDIR)/mnist.o \ + $(BUILDDIR)/utils.o $(CC) $^ -o $@ $(CFLAGS) -$(BUILDDIR)/cnn-main-cuda: $(BUILDDIR)/cnn_main.o $(BUILDDIR)/cnn_train.o $(BUILDDIR)/cnn_test_network.o $(BUILDDIR)/cnn_cnn.o $(BUILDDIR)/cnn_creation.o $(BUILDDIR)/cnn_initialisation.o $(BUILDDIR)/cnn_make.o $(BUILDDIR)/cnn_neuron_io.o $(BUILDDIR)/cnn_function.o $(BUILDDIR)/cnn_utils.o $(BUILDDIR)/cnn_update.o $(BUILDDIR)/cnn_free.o $(BUILDDIR)/cnn_jpeg.o $(BUILDDIR)/cnn_cuda_convolution.o $(BUILDDIR)/cnn_backpropagation.o $(BUILDDIR)/cuda_utils.o $(BUILDDIR)/colors.o $(BUILDDIR)/mnist.o +$(BUILDDIR)/cnn-main-cuda: $(BUILDDIR)/cnn_main.cuda.o \ + $(BUILDDIR)/cnn_train.cuda.o \ + $(BUILDDIR)/cnn_test_network.cuda.o \ + $(BUILDDIR)/cnn_cnn.cuda.o \ + $(BUILDDIR)/cnn_creation.cuda.o \ + $(BUILDDIR)/cnn_initialisation.cuda.o \ + $(BUILDDIR)/cnn_make.cuda.o \ + $(BUILDDIR)/cnn_neuron_io.cuda.o \ + $(BUILDDIR)/cnn_function.cuda.o \ + $(BUILDDIR)/cnn_utils.cuda.o \ + $(BUILDDIR)/cnn_update.cuda.o \ + $(BUILDDIR)/cnn_free.cuda.o \ + $(BUILDDIR)/cnn_jpeg.cuda.o \ + $(BUILDDIR)/cnn_cuda_convolution.o \ + $(BUILDDIR)/cnn_backpropagation.cuda.o \ + $(BUILDDIR)/colors.cuda.o \ + $(BUILDDIR)/mnist.cuda.o \ + $(BUILDDIR)/utils.cuda.o \ + $(BUILDDIR)/cuda_utils.o ifndef NVCC_INSTALLED @echo "$(NVCC) not found, skipping" else $(NVCC) $(NVCCFLAGS) $^ -o $@ endif -$(BUILDDIR)/cnn-preview: $(CNN_SRCDIR)/preview.c $(BUILDDIR)/cnn_jpeg.o $(BUILDDIR)/colors.o +$(BUILDDIR)/cnn-preview: $(CNN_SRCDIR)/preview.c $(BUILDDIR)/cnn_jpeg.o $(BUILDDIR)/colors.o $(BUILDDIR)/utils.o $(CC) $^ -o $@ $(CFLAGS) $(BUILDDIR)/cnn_%.o: $(CNN_SRCDIR)/%.c $(CNN_SRCDIR)/include/%.h $(CC) -c $< -o $@ $(CFLAGS) +$(BUILDDIR)/cnn_%.cuda.o: $(CNN_SRCDIR)/%.c $(CNN_SRCDIR)/include/%.h + $(CC) -c $< -o $@ $(CFLAGS) -DUSE_CUDA -lcuda -I/opt/cuda/include + $(BUILDDIR)/cnn_cuda_%.o: $(CNN_SRCDIR)/%.cu $(CNN_SRCDIR)/include/%.h ifndef NVCC_INSTALLED @echo "$(NVCC) not found, skipping" @@ -95,6 +136,9 @@ endif $(BUILDDIR)/%.o: $(SRCDIR)/%.c $(SRCDIR)/include/%.h $(CC) -c $< -o $@ $(CFLAGS) +$(BUILDDIR)/%.cuda.o: $(SRCDIR)/%.c $(SRCDIR)/include/%.h + $(CC) -c $< -o $@ $(CFLAGS) -DUSE_CUDA -lcuda -I/opt/cuda/include + $(BUILDDIR)/cuda_%.o: $(SRCDIR)/%.cu $(SRCDIR)/include/%.h ifndef NVCC_INSTALLED @echo "$(NVCC) not found, skipping" @@ -116,14 +160,18 @@ prepare-tests: @rm -f $(BUILDDIR)/test-* -build/test-cnn_%: test/cnn_%.c $(CNN_OBJ) $(BUILDDIR)/colors.o $(BUILDDIR)/mnist.o +build/test-cnn_%: test/cnn_%.c $(CNN_OBJ) $(BUILDDIR)/colors.o $(BUILDDIR)/mnist.o $(BUILDDIR)/utils.o $(CC) $^ -o $@ $(CFLAGS) # mnist.o est déjà inclus en tant que mnist_mnist.o build/test-mnist_%: test/mnist_%.c $(MNIST_OBJ) $(BUILDDIR)/colors.o $(CC) $^ -o $@ $(CFLAGS) -$(BUILDDIR)/test-cnn_%: test/cnn_%.cu $(BUILDDIR)/cnn_cuda_%.o $(BUILDDIR)/cuda_utils.o $(BUILDDIR)/colors.o $(BUILDDIR)/mnist.o +$(BUILDDIR)/test-cnn_%: test/cnn_%.cu \ + $(BUILDDIR)/cnn_cuda_%.o \ + $(BUILDDIR)/cuda_utils.o \ + $(BUILDDIR)/colors.cuda.o \ + $(BUILDDIR)/mnist.cuda.o ifndef NVCC_INSTALLED @echo "$(NVCC) not found, skipping" else diff --git a/src/cnn/convolution.c b/src/cnn/convolution.c index 7403137..f33dd8e 100644 --- a/src/cnn/convolution.c +++ b/src/cnn/convolution.c @@ -3,36 +3,8 @@ #include #include "include/struct.h" -#ifdef __CUDACC__ - #include "../include/utils.h" -#else -bool check_cuda_compatibility() { - #ifdef __CUDACC__ - int nDevices; - cudaDeviceProp prop; +#include "../include/utils.h" - cudaGetDeviceCount(&nDevices); - if (nDevices == 0) { - printf("Pas d'utilisation du GPU\n\n"); - return false; - } - - printf("GPUs disponibles:\n"); - - for (int i=0; i < nDevices; i++) { - cudaGetDeviceProperties(&prop, i); - printf(" - %s\n", prop.name); - } - - cudaGetDeviceProperties(&prop, 0); - printf("Utilisation du GPU: " BLUE "%s" RESET " (Compute capability: %d.%d)\n\n", prop.name, prop.major, prop.minor); - return true; - #else - printf("Pas d'utilisation du GPU\n\n"); - return false; - #endif -} -#endif #define BLOCKSIZE_x 16 #define BLOCKSIZE_y 8 @@ -64,107 +36,40 @@ void make_convolution_cpu(Kernel_cnn* kernel, float*** input, float*** output, i #ifdef __CUDACC__ -__global__ void make_convolution_kernel(int k_size, int columns, int rows, float*** bias, size_t pitch_bias, float**** w, size_t pitch_w, float*** input, size_t pitch_input, float*** output, size_t pitch_output, int output_dim) { +__global__ void make_convolution_kernel(Kernel_cnn* kernel, float*** input, float*** output, int output_dim) { // Équivalents respectifs de i, j et k dans la boucle effectuée par le cpu int idx = threadIdx.x + blockDim.x*blockIdx.x; // < kernel->columns int idy = threadIdx.y + blockDim.y*blockIdx.y; // < min(output_dim, k_size) int idz = threadIdx.z + blockDim.z*blockIdx.z; // < min(output_dim, k_size) - int input_dim = output_dim+k_size - 1; - - if (idx >= columns || idy >= output_dim || idz >= output_dim) { + if (idx >= kernel->columns || idy >= output_dim || idz >= output_dim) { return; } - float* bias_offset; - float* w_offset; - float* input_offset; - float* output_offset; + float f = kernel->bias[idx][idy][idz]; - bias_offset = (float*)((char*)bias + (idx*output_dim+idy)*pitch_bias); - float f = bias_offset[idz]; - - for (int a=0; a < rows; a++) { - for (int b=0; b < k_size; b++) { - for (int c=0; c < k_size; c++) { - w_offset = (float*)((char*)w + ((a*columns + idx)*k_size+b)*pitch_w); - input_offset = (float*)((char*)input + (a*input_dim + idy+b)*pitch_input); - f += w_offset[c]*input_offset[idz+c]; + for (int a=0; a < kernel->rows; a++) { + for (int b=0; b < kernel->k_size; b++) { + for (int c=0; c < kernel->k_size; c++) { + f += kernel->w[a][idx][b][c]*input[a][idy+b][idz+c]; } } } - output_offset = (float*)((char*)output + (idx*output_dim+idy)*pitch_output); - output_offset[idz] = f; + output[idx][idy][idz] = f; } void make_convolution_device(Kernel_cnn* kernel, float*** input, float*** output, int output_dim) { - // Copy arrays - size_t pitch_input; - size_t pitch_output; - size_t pitch_bias; - size_t pitch_weight; - float*** input_dev; - float*** output_dev; - float*** kernel_bias; - float**** kernel_weight; - - int input_dim = output_dim+kernel->k_size - 1; - - // Copy ***input - gpuErrchk( cudaMallocPitch((void**)&input_dev, &pitch_input, input_dim*sizeof(float), kernel->rows*input_dim)); - for (int i=0; i < kernel->rows; i++) { - for (int j=0; j < input_dim; j++) { - gpuErrchk( cudaMemcpy((void*)((char*)input_dev + (i*input_dim+j)*pitch_input), (const void*)&(input[i][j][0]), input_dim*sizeof(float), cudaMemcpyHostToDevice)); - } - } - // cudaMalloc ***output - gpuErrchk( cudaMallocPitch((void**)&output_dev, &pitch_output, output_dim*sizeof(float), kernel->columns*output_dim)); - - // Copy ***Kernel bias - gpuErrchk( cudaMallocPitch((void**)&kernel_bias, &pitch_bias, output_dim*sizeof(float), kernel->columns*output_dim)); - for (int i=0; i < kernel->columns; i++) { - for (int j=0; j < output_dim; j++) { - gpuErrchk( cudaMemcpy((void*)((char*)kernel_bias + (i*output_dim+j)*pitch_bias), (const void*)&(kernel->bias[i][j][0]), output_dim*sizeof(float), cudaMemcpyHostToDevice)); - } - } - - // Copy ****Kernel weights - gpuErrchk( cudaMallocPitch((void**)&kernel_weight, &pitch_weight, kernel->k_size*sizeof(float), (kernel->rows*kernel->columns*kernel->k_size))); - for (int i=0; i < kernel->rows; i++) { - for (int j=0; j < kernel->columns; j++) { - for (int k=0; k < kernel->k_size; k++) { - gpuErrchk( cudaMemcpy((void*)((char*)kernel_weight + ((i*kernel->columns+j)*kernel->k_size+k)*pitch_weight), (const void*)&(kernel->w[i][j][k][0]), kernel->k_size*sizeof(float), cudaMemcpyHostToDevice)); - } - } - } - // Make computation dim3 gridSize(i_div_up(kernel->columns, BLOCKSIZE_x), i_div_up(output_dim, BLOCKSIZE_y), i_div_up(output_dim, BLOCKSIZE_z)); dim3 blockSize(BLOCKSIZE_x, BLOCKSIZE_y, BLOCKSIZE_z); - make_convolution_kernel<<>>(kernel->k_size, kernel->columns, kernel->rows, kernel_bias, pitch_bias, kernel_weight, pitch_weight, input_dev, pitch_input, output_dev, pitch_output, output_dim); - gpuErrchk( cudaPeekAtLastError() ); - gpuErrchk( cudaDeviceSynchronize() ); - - // Copy output back - for (int i=0; i < kernel->columns; i++) { - for (int j=0; j < output_dim; j++) { - gpuErrchk( cudaMemcpy((void*)&(output[i][j][0]), (const void*)((char*)output_dev + (i*output_dim+j)*pitch_output), output_dim*sizeof(float), cudaMemcpyDeviceToHost)); - } - } - - // Free all the allocated memory - gpuErrchk( cudaFree(input_dev) ); - gpuErrchk( cudaFree(output_dev) ); - gpuErrchk( cudaFree(kernel_bias) ); - gpuErrchk( cudaFree(kernel_weight) ); + make_convolution_kernel<<>>(kernel, input, output, output_dim); gpuErrchk( cudaPeekAtLastError() ); gpuErrchk( cudaDeviceSynchronize() ); } #endif - void make_convolution(Kernel_cnn* kernel, float*** input, float*** output, int output_dim) { #ifndef __CUDACC__ make_convolution_cpu(kernel, input, output, output_dim); diff --git a/src/cnn/convolution.cu b/src/cnn/convolution.cu index 072cdf1..8e36160 100644 --- a/src/cnn/convolution.cu +++ b/src/cnn/convolution.cu @@ -3,36 +3,8 @@ #include #include "include/struct.h" -#ifdef __CUDACC__ - #include "../include/utils.h" -#else -bool check_cuda_compatibility() { - #ifdef __CUDACC__ - int nDevices; - cudaDeviceProp prop; +#include "../include/utils.h" - cudaGetDeviceCount(&nDevices); - if (nDevices == 0) { - printf("Pas d'utilisation du GPU\n\n"); - return false; - } - - printf("GPUs disponibles:\n"); - - for (int i=0; i < nDevices; i++) { - cudaGetDeviceProperties(&prop, i); - printf(" - %s\n", prop.name); - } - - cudaGetDeviceProperties(&prop, 0); - printf("Utilisation du GPU: " BLUE "%s" RESET " (Compute capability: %d.%d)\n\n", prop.name, prop.major, prop.minor); - return true; - #else - printf("Pas d'utilisation du GPU\n\n"); - return false; - #endif -} -#endif #define BLOCKSIZE_x 16 #define BLOCKSIZE_y 8 @@ -64,101 +36,35 @@ void make_convolution_cpu(Kernel_cnn* kernel, float*** input, float*** output, i #ifdef __CUDACC__ -__global__ void make_convolution_kernel(int k_size, int columns, int rows, float*** bias, size_t pitch_bias, float**** w, size_t pitch_w, float*** input, size_t pitch_input, float*** output, size_t pitch_output, int output_dim) { +__global__ void make_convolution_kernel(Kernel_cnn* kernel, float*** input, float*** output, int output_dim) { // Équivalents respectifs de i, j et k dans la boucle effectuée par le cpu int idx = threadIdx.x + blockDim.x*blockIdx.x; // < kernel->columns int idy = threadIdx.y + blockDim.y*blockIdx.y; // < min(output_dim, k_size) int idz = threadIdx.z + blockDim.z*blockIdx.z; // < min(output_dim, k_size) - int input_dim = output_dim+k_size - 1; - - if (idx >= columns || idy >= output_dim || idz >= output_dim) { + if (idx >= kernel->columns || idy >= output_dim || idz >= output_dim) { return; } - float* bias_offset; - float* w_offset; - float* input_offset; - float* output_offset; + float f = kernel->bias[idx][idy][idz]; - bias_offset = (float*)((char*)bias + (idx*output_dim+idy)*pitch_bias); - float f = bias_offset[idz]; - - for (int a=0; a < rows; a++) { - for (int b=0; b < k_size; b++) { - for (int c=0; c < k_size; c++) { - w_offset = (float*)((char*)w + ((a*columns + idx)*k_size+b)*pitch_w); - input_offset = (float*)((char*)input + (a*input_dim + idy+b)*pitch_input); - f += w_offset[c]*input_offset[idz+c]; + for (int a=0; a < kernel->rows; a++) { + for (int b=0; b < kernel->k_size; b++) { + for (int c=0; c < kernel->k_size; c++) { + f += kernel->w[a][idx][b][c]*input[a][idy+b][idz+c]; } } } - output_offset = (float*)((char*)output + (idx*output_dim+idy)*pitch_output); - output_offset[idz] = f; + output[idx][idy][idz] = f; } void make_convolution_device(Kernel_cnn* kernel, float*** input, float*** output, int output_dim) { - // Copy arrays - size_t pitch_input; - size_t pitch_output; - size_t pitch_bias; - size_t pitch_weight; - float*** input_dev; - float*** output_dev; - float*** kernel_bias; - float**** kernel_weight; - - int input_dim = output_dim+kernel->k_size - 1; - - // Copy ***input - gpuErrchk( cudaMallocPitch((void**)&input_dev, &pitch_input, input_dim*sizeof(float), kernel->rows*input_dim)); - for (int i=0; i < kernel->rows; i++) { - for (int j=0; j < input_dim; j++) { - gpuErrchk( cudaMemcpy((void*)((char*)input_dev + (i*input_dim+j)*pitch_input), (const void*)&(input[i][j][0]), input_dim*sizeof(float), cudaMemcpyHostToDevice)); - } - } - // cudaMalloc ***output - gpuErrchk( cudaMallocPitch((void**)&output_dev, &pitch_output, output_dim*sizeof(float), kernel->columns*output_dim)); - - // Copy ***Kernel bias - gpuErrchk( cudaMallocPitch((void**)&kernel_bias, &pitch_bias, output_dim*sizeof(float), kernel->columns*output_dim)); - for (int i=0; i < kernel->columns; i++) { - for (int j=0; j < output_dim; j++) { - gpuErrchk( cudaMemcpy((void*)((char*)kernel_bias + (i*output_dim+j)*pitch_bias), (const void*)&(kernel->bias[i][j][0]), output_dim*sizeof(float), cudaMemcpyHostToDevice)); - } - } - - // Copy ****Kernel weights - gpuErrchk( cudaMallocPitch((void**)&kernel_weight, &pitch_weight, kernel->k_size*sizeof(float), (kernel->rows*kernel->columns*kernel->k_size))); - for (int i=0; i < kernel->rows; i++) { - for (int j=0; j < kernel->columns; j++) { - for (int k=0; k < kernel->k_size; k++) { - gpuErrchk( cudaMemcpy((void*)((char*)kernel_weight + ((i*kernel->columns+j)*kernel->k_size+k)*pitch_weight), (const void*)&(kernel->w[i][j][k][0]), kernel->k_size*sizeof(float), cudaMemcpyHostToDevice)); - } - } - } - // Make computation dim3 gridSize(i_div_up(kernel->columns, BLOCKSIZE_x), i_div_up(output_dim, BLOCKSIZE_y), i_div_up(output_dim, BLOCKSIZE_z)); dim3 blockSize(BLOCKSIZE_x, BLOCKSIZE_y, BLOCKSIZE_z); - make_convolution_kernel<<>>(kernel->k_size, kernel->columns, kernel->rows, kernel_bias, pitch_bias, kernel_weight, pitch_weight, input_dev, pitch_input, output_dev, pitch_output, output_dim); - gpuErrchk( cudaPeekAtLastError() ); - gpuErrchk( cudaDeviceSynchronize() ); - - // Copy output back - for (int i=0; i < kernel->columns; i++) { - for (int j=0; j < output_dim; j++) { - gpuErrchk( cudaMemcpy((void*)&(output[i][j][0]), (const void*)((char*)output_dev + (i*output_dim+j)*pitch_output), output_dim*sizeof(float), cudaMemcpyDeviceToHost)); - } - } - - // Free all the allocated memory - gpuErrchk( cudaFree(input_dev) ); - gpuErrchk( cudaFree(output_dev) ); - gpuErrchk( cudaFree(kernel_bias) ); - gpuErrchk( cudaFree(kernel_weight) ); + make_convolution_kernel<<>>(kernel, input, output, output_dim); gpuErrchk( cudaPeekAtLastError() ); gpuErrchk( cudaDeviceSynchronize() ); } diff --git a/src/cnn/creation.c b/src/cnn/creation.c index 862ffb5..dd71469 100644 --- a/src/cnn/creation.c +++ b/src/cnn/creation.c @@ -3,6 +3,7 @@ #include "include/initialisation.h" #include "include/function.h" +#include "../include/utils.h" #include "include/creation.h" @@ -10,19 +11,19 @@ Network* create_network(int max_size, float learning_rate, int dropout, int init if (dropout < 0 || dropout > 100) { printf("Erreur, la probabilité de dropout n'est pas respecté, elle doit être comprise entre 0 et 100\n"); } - Network* network = (Network*)malloc(sizeof(Network)); + Network* network = (Network*)nalloc(sizeof(Network)); network->learning_rate = learning_rate; network->max_size = max_size; network->dropout = dropout; network->initialisation = initialisation; network->size = 1; - network->input = (float****)malloc(sizeof(float***)*max_size); - network->input_z = (float****)malloc(sizeof(float***)*max_size); - network->kernel = (Kernel**)malloc(sizeof(Kernel*)*(max_size-1)); - network->width = (int*)malloc(sizeof(int*)*max_size); - network->depth = (int*)malloc(sizeof(int*)*max_size); + network->input = (float****)nalloc(sizeof(float***)*max_size); + network->input_z = (float****)nalloc(sizeof(float***)*max_size); + network->kernel = (Kernel**)nalloc(sizeof(Kernel*)*(max_size-1)); + network->width = (int*)nalloc(sizeof(int*)*max_size); + network->depth = (int*)nalloc(sizeof(int*)*max_size); for (int i=0; i < max_size-1; i++) { - network->kernel[i] = (Kernel*)malloc(sizeof(Kernel)); + network->kernel[i] = (Kernel*)nalloc(sizeof(Kernel)); } network->width[0] = input_dim; network->depth[0] = input_depth; @@ -57,11 +58,11 @@ Network* create_simple_one(float learning_rate, int dropout, int activation, int } void create_a_cube_input_layer(Network* network, int pos, int depth, int dim) { - network->input[pos] = (float***)malloc(sizeof(float**)*depth); + network->input[pos] = (float***)nalloc(sizeof(float**)*depth); for (int i=0; i < depth; i++) { - network->input[pos][i] = (float**)malloc(sizeof(float*)*dim); + network->input[pos][i] = (float**)nalloc(sizeof(float*)*dim); for (int j=0; j < dim; j++) { - network->input[pos][i][j] = (float*)malloc(sizeof(float)*dim); + network->input[pos][i][j] = (float*)nalloc(sizeof(float)*dim); } } network->width[pos] = dim; @@ -69,11 +70,11 @@ void create_a_cube_input_layer(Network* network, int pos, int depth, int dim) { } void create_a_cube_input_z_layer(Network* network, int pos, int depth, int dim) { - network->input_z[pos] = (float***)malloc(sizeof(float**)*depth); + network->input_z[pos] = (float***)nalloc(sizeof(float**)*depth); for (int i=0; i < depth; i++) { - network->input_z[pos][i] = (float**)malloc(sizeof(float*)*dim); + network->input_z[pos][i] = (float**)nalloc(sizeof(float*)*dim); for (int j=0; j < dim; j++) { - network->input_z[pos][i][j] = (float*)malloc(sizeof(float)*dim); + network->input_z[pos][i][j] = (float*)nalloc(sizeof(float)*dim); } } network->width[pos] = dim; @@ -81,17 +82,17 @@ void create_a_cube_input_z_layer(Network* network, int pos, int depth, int dim) } void create_a_line_input_layer(Network* network, int pos, int dim) { - network->input[pos] = (float***)malloc(sizeof(float**)); - network->input[pos][0] = (float**)malloc(sizeof(float*)); - network->input[pos][0][0] = (float*)malloc(sizeof(float)*dim); + network->input[pos] = (float***)nalloc(sizeof(float**)); + network->input[pos][0] = (float**)nalloc(sizeof(float*)); + network->input[pos][0][0] = (float*)nalloc(sizeof(float)*dim); network->width[pos] = dim; network->depth[pos] = 1; } void create_a_line_input_z_layer(Network* network, int pos, int dim) { - network->input_z[pos] = (float***)malloc(sizeof(float**)); - network->input_z[pos][0] = (float**)malloc(sizeof(float*)); - network->input_z[pos][0][0] = (float*)malloc(sizeof(float)*dim); + network->input_z[pos] = (float***)nalloc(sizeof(float**)); + network->input_z[pos][0] = (float**)nalloc(sizeof(float*)); + network->input_z[pos][0][0] = (float*)nalloc(sizeof(float)*dim); network->width[pos] = dim; network->depth[pos] = 1; } @@ -132,34 +133,40 @@ void add_convolution(Network* network, int depth_output, int dim_output, int act network->kernel[k_pos]->nn = NULL; network->kernel[k_pos]->activation = activation; network->kernel[k_pos]->linearisation = 0; - network->kernel[k_pos]->cnn = (Kernel_cnn*)malloc(sizeof(Kernel_cnn)); + network->kernel[k_pos]->cnn = (Kernel_cnn*)nalloc(sizeof(Kernel_cnn)); Kernel_cnn* cnn = network->kernel[k_pos]->cnn; cnn->k_size = kernel_size; cnn->rows = depth_input; cnn->columns = depth_output; - cnn->w = (float****)malloc(sizeof(float***)*depth_input); - cnn->d_w = (float****)malloc(sizeof(float***)*depth_input); + cnn->w = (float****)nalloc(sizeof(float***)*depth_input); + cnn->d_w = (float****)nalloc(sizeof(float***)*depth_input); for (int i=0; i < depth_input; i++) { - cnn->w[i] = (float***)malloc(sizeof(float**)*depth_output); - cnn->d_w[i] = (float***)malloc(sizeof(float**)*depth_output); + cnn->w[i] = (float***)nalloc(sizeof(float**)*depth_output); + cnn->d_w[i] = (float***)nalloc(sizeof(float**)*depth_output); for (int j=0; j < depth_output; j++) { - cnn->w[i][j] = (float**)malloc(sizeof(float*)*kernel_size); - cnn->d_w[i][j] = (float**)malloc(sizeof(float*)*kernel_size); + cnn->w[i][j] = (float**)nalloc(sizeof(float*)*kernel_size); + cnn->d_w[i][j] = (float**)nalloc(sizeof(float*)*kernel_size); for (int k=0; k < kernel_size; k++) { - cnn->w[i][j][k] = (float*)malloc(sizeof(float)*kernel_size); - cnn->d_w[i][j][k] = (float*)calloc(kernel_size, sizeof(float)); + cnn->w[i][j][k] = (float*)nalloc(sizeof(float)*kernel_size); + cnn->d_w[i][j][k] = (float*)nalloc(sizeof(float)*kernel_size); + for (int l=0; l < kernel_size; l++) { + cnn->d_w[i][j][k][l] = 0.; + } } } } - cnn->bias = (float***)malloc(sizeof(float**)*depth_output); - cnn->d_bias = (float***)malloc(sizeof(float**)*depth_output); + cnn->bias = (float***)nalloc(sizeof(float**)*depth_output); + cnn->d_bias = (float***)nalloc(sizeof(float**)*depth_output); for (int i=0; i < depth_output; i++) { - cnn->bias[i] = (float**)malloc(sizeof(float*)*bias_size); - cnn->d_bias[i] = (float**)malloc(sizeof(float*)*bias_size); + cnn->bias[i] = (float**)nalloc(sizeof(float*)*bias_size); + cnn->d_bias[i] = (float**)nalloc(sizeof(float*)*bias_size); for (int j=0; j < bias_size; j++) { - cnn->bias[i][j] = (float*)malloc(sizeof(float)*bias_size); - cnn->d_bias[i][j] = (float*)calloc(bias_size, sizeof(float)); + cnn->bias[i][j] = (float*)nalloc(sizeof(float)*bias_size); + cnn->d_bias[i][j] = (float*)nalloc(sizeof(float)*bias_size); + for (int k=0; k < bias_size; k++) { + cnn->d_bias[i][j][k] = 0.; + } } } int n_in = network->width[n-1]*network->width[n-1]*network->depth[n-1]; @@ -180,20 +187,29 @@ void add_dense(Network* network, int output_units, int activation) { return; } network->kernel[k_pos]->cnn = NULL; - network->kernel[k_pos]->nn = (Kernel_nn*)malloc(sizeof(Kernel_nn)); + network->kernel[k_pos]->nn = (Kernel_nn*)nalloc(sizeof(Kernel_nn)); Kernel_nn* nn = network->kernel[k_pos]->nn; network->kernel[k_pos]->activation = activation; network->kernel[k_pos]->linearisation = 0; + nn->input_units = input_units; nn->output_units = output_units; - nn->bias = (float*)malloc(sizeof(float)*output_units); - nn->d_bias = (float*)calloc(output_units, sizeof(float)); - nn->weights = (float**)malloc(sizeof(float*)*input_units); - nn->d_weights = (float**)malloc(sizeof(float*)*input_units); - for (int i=0; i < input_units; i++) { - nn->weights[i] = (float*)malloc(sizeof(float)*output_units); - nn->d_weights[i] = (float*)calloc(output_units, sizeof(float)); + nn->bias = (float*)nalloc(sizeof(float)*output_units); + nn->d_bias = (float*)nalloc(sizeof(float)*output_units); + for (int i=0; i < output_units; i++) { + nn->d_bias[i] = 0.; } + + nn->weights = (float**)nalloc(sizeof(float*)*input_units); + nn->d_weights = (float**)nalloc(sizeof(float*)*input_units); + for (int i=0; i < input_units; i++) { + nn->weights[i] = (float*)nalloc(sizeof(float)*output_units); + nn->d_weights[i] = (float*)nalloc(sizeof(float)*output_units); + for (int j=0; j < output_units; j++) { + nn->d_weights[i][j] = 0.; + } + } + initialisation_1d_matrix(network->initialisation, nn->bias, output_units, input_units, output_units); initialisation_2d_matrix(network->initialisation, nn->weights, input_units, output_units, input_units, output_units); create_a_line_input_layer(network, n, output_units); @@ -212,20 +228,26 @@ void add_dense_linearisation(Network* network, int output_units, int activation) return; } network->kernel[k_pos]->cnn = NULL; - network->kernel[k_pos]->nn = (Kernel_nn*)malloc(sizeof(Kernel_nn)); + network->kernel[k_pos]->nn = (Kernel_nn*)nalloc(sizeof(Kernel_nn)); Kernel_nn* nn = network->kernel[k_pos]->nn; network->kernel[k_pos]->activation = activation; network->kernel[k_pos]->linearisation = 1; nn->input_units = input_units; nn->output_units = output_units; - nn->bias = (float*)malloc(sizeof(float)*output_units); - nn->d_bias = (float*)calloc(output_units, sizeof(float)); - nn->weights = (float**)malloc(sizeof(float*)*input_units); - nn->d_weights = (float**)malloc(sizeof(float*)*input_units); + nn->bias = (float*)nalloc(sizeof(float)*output_units); + nn->d_bias = (float*)nalloc(sizeof(float)*output_units); + for (int i=0; i < output_units; i++) { + nn->d_bias[i] = 0.; + } + nn->weights = (float**)nalloc(sizeof(float*)*input_units); + nn->d_weights = (float**)nalloc(sizeof(float*)*input_units); for (int i=0; i < input_units; i++) { - nn->weights[i] = (float*)malloc(sizeof(float)*output_units); - nn->d_weights[i] = (float*)calloc(output_units, sizeof(float)); + nn->weights[i] = (float*)nalloc(sizeof(float)*output_units); + nn->d_weights[i] = (float*)nalloc(sizeof(float)*output_units); + for (int j=0; j < output_units; j++) { + nn->d_weights[i][j] = 0.; + } } initialisation_1d_matrix(network->initialisation, nn->bias, output_units, input_units, output_units); initialisation_2d_matrix(network->initialisation, nn->weights, input_units, output_units, input_units, output_units); diff --git a/src/cnn/free.c b/src/cnn/free.c index 629b773..dab0d7d 100644 --- a/src/cnn/free.c +++ b/src/cnn/free.c @@ -1,28 +1,30 @@ #include #include +#include "../include/utils.h" + #include "include/free.h" void free_a_cube_input_layer(Network* network, int pos, int depth, int dim) { for (int i=0; i < depth; i++) { for (int j=0; j < dim; j++) { - free(network->input[pos][i][j]); - free(network->input_z[pos][i][j]); + gree(network->input[pos][i][j]); + gree(network->input_z[pos][i][j]); } - free(network->input[pos][i]); - free(network->input_z[pos][i]); + gree(network->input[pos][i]); + gree(network->input_z[pos][i]); } - free(network->input[pos]); - free(network->input_z[pos]); + gree(network->input[pos]); + gree(network->input_z[pos]); } void free_a_line_input_layer(Network* network, int pos) { - free(network->input[pos][0][0]); - free(network->input_z[pos][0][0]); - free(network->input[pos][0]); - free(network->input_z[pos][0]); - free(network->input[pos]); - free(network->input_z[pos]); + gree(network->input[pos][0][0]); + gree(network->input_z[pos][0][0]); + gree(network->input[pos][0]); + gree(network->input_z[pos][0]); + gree(network->input[pos]); + gree(network->input_z[pos]); } void free_2d_average_pooling(Network* network, int pos) { @@ -38,31 +40,31 @@ void free_convolution(Network* network, int pos) { free_a_cube_input_layer(network, pos+1, network->depth[pos+1], network->width[pos+1]); for (int i=0; i < c; i++) { for (int j=0; j < bias_size; j++) { - free(k_pos->bias[i][j]); - free(k_pos->d_bias[i][j]); + gree(k_pos->bias[i][j]); + gree(k_pos->d_bias[i][j]); } - free(k_pos->bias[i]); - free(k_pos->d_bias[i]); + gree(k_pos->bias[i]); + gree(k_pos->d_bias[i]); } - free(k_pos->bias); - free(k_pos->d_bias); + gree(k_pos->bias); + gree(k_pos->d_bias); for (int i=0; i < r; i++) { for (int j=0; j < c; j++) { for (int k=0; k < k_size; k++) { - free(k_pos->w[i][j][k]); - free(k_pos->d_w[i][j][k]); + gree(k_pos->w[i][j][k]); + gree(k_pos->d_w[i][j][k]); } - free(k_pos->w[i][j]); - free(k_pos->d_w[i][j]); + gree(k_pos->w[i][j]); + gree(k_pos->d_w[i][j]); } - free(k_pos->w[i]); - free(k_pos->d_w[i]); + gree(k_pos->w[i]); + gree(k_pos->d_w[i]); } - free(k_pos->w); - free(k_pos->d_w); + gree(k_pos->w); + gree(k_pos->d_w); - free(k_pos); + gree(k_pos); } void free_dense(Network* network, int pos) { @@ -70,16 +72,16 @@ void free_dense(Network* network, int pos) { Kernel_nn* k_pos = network->kernel[pos]->nn; int dim = k_pos->input_units; for (int i=0; i < dim; i++) { - free(k_pos->weights[i]); - free(k_pos->d_weights[i]); + gree(k_pos->weights[i]); + gree(k_pos->d_weights[i]); } - free(k_pos->weights); - free(k_pos->d_weights); + gree(k_pos->weights); + gree(k_pos->d_weights); - free(k_pos->bias); - free(k_pos->d_bias); + gree(k_pos->bias); + gree(k_pos->d_bias); - free(k_pos); + gree(k_pos); } void free_dense_linearisation(Network* network, int pos) { @@ -87,29 +89,29 @@ void free_dense_linearisation(Network* network, int pos) { Kernel_nn* k_pos = network->kernel[pos]->nn; int dim = k_pos->input_units; for (int i=0; i < dim; i++) { - free(k_pos->weights[i]); - free(k_pos->d_weights[i]); + gree(k_pos->weights[i]); + gree(k_pos->d_weights[i]); } - free(k_pos->weights); - free(k_pos->d_weights); + gree(k_pos->weights); + gree(k_pos->d_weights); - free(k_pos->bias); - free(k_pos->d_bias); + gree(k_pos->bias); + gree(k_pos->d_bias); - free(k_pos); + gree(k_pos); } void free_network_creation(Network* network) { free_a_cube_input_layer(network, 0, network->depth[0], network->width[0]); for (int i=0; i < network->max_size-1; i++) - free(network->kernel[i]); - free(network->width); - free(network->depth); - free(network->kernel); - free(network->input); - free(network->input_z); + gree(network->kernel[i]); + gree(network->width); + gree(network->depth); + gree(network->kernel); + gree(network->input); + gree(network->input_z); - free(network); + gree(network); } void free_network(Network* network) { diff --git a/src/cnn/jpeg.c b/src/cnn/jpeg.c index 1b53bcb..6df0f98 100644 --- a/src/cnn/jpeg.c +++ b/src/cnn/jpeg.c @@ -6,6 +6,7 @@ #include #include "include/jpeg.h" +#include "../include/utils.h" #include "../include/colors.h" // How to load a JPEG using libjpeg: https://www.tspi.at/2020/03/20/libjpegexample.html @@ -52,9 +53,9 @@ imgRawImage* loadJpegImageFile(char* lpFilename) { #endif dwBufferBytes = imgWidth * imgHeight * 3; /* We only read RGB, not A */ - lpData = (unsigned char*)malloc(sizeof(unsigned char)*dwBufferBytes); + lpData = (unsigned char*)nalloc(sizeof(unsigned char)*dwBufferBytes); - lpNewImage = (imgRawImage*)malloc(sizeof(imgRawImage)); + lpNewImage = (imgRawImage*)nalloc(sizeof(imgRawImage)); lpNewImage->numComponents = numComponents; lpNewImage->width = imgWidth; lpNewImage->height = imgHeight; @@ -74,7 +75,7 @@ imgRawImage* loadJpegImageFile(char* lpFilename) { } jpegDataset* loadJpegDataset(char* folderPath) { - jpegDataset* dataset = (jpegDataset*)malloc(sizeof(jpegDataset)); + jpegDataset* dataset = (jpegDataset*)nalloc(sizeof(jpegDataset)); imgRawImage* image; // We start by counting the number of images and categories @@ -82,8 +83,8 @@ jpegDataset* loadJpegDataset(char* folderPath) { dataset->numImages = countFiles(folderPath); dataset->images = NULL; - dataset->labels = (unsigned int*)malloc(sizeof(unsigned int)*dataset->numImages); - dataset->fileNames = (char**)malloc(sizeof(char*)*dataset->numImages); + dataset->labels = (unsigned int*)nalloc(sizeof(unsigned int)*dataset->numImages); + dataset->fileNames = (char**)nalloc(sizeof(char*)*dataset->numImages); DIR* dirp; struct dirent* entry; @@ -96,17 +97,17 @@ jpegDataset* loadJpegDataset(char* folderPath) { if (strcmp(entry->d_name, ".")&&strcmp(entry->d_name, "..")) { if (entry->d_type == DT_DIR) { prev_index = index; - concatenated_path = malloc(strlen(folderPath)+strlen(entry->d_name)+2); + concatenated_path = nalloc(strlen(folderPath)+strlen(entry->d_name)+2); sprintf(concatenated_path, "%s/%s", folderPath, entry->d_name); addFilenamesToArray(concatenated_path, dataset->fileNames, &index); for (int i=prev_index; i < index; i++) { dataset->labels[i] = getLabel(entry->d_name); } - free(concatenated_path); + gree(concatenated_path); } } } - dataset->images = (unsigned char**)malloc(sizeof(unsigned char*)*dataset->numImages); + dataset->images = (unsigned char**)nalloc(sizeof(unsigned char*)*dataset->numImages); for (int i=0; i < (int)dataset->numImages; i++) { dataset->images[i] = NULL; #ifdef STORE_IMAGES_TO_RAM @@ -116,7 +117,7 @@ jpegDataset* loadJpegDataset(char* folderPath) { } image = loadJpegImageFile(dataset->fileNames[i]); dataset->images[i] = image->lpData; - free(image); + gree(image); #endif } #ifdef STORE_IMAGES_TO_RAM @@ -129,8 +130,8 @@ jpegDataset* loadJpegDataset(char* folderPath) { dataset->height = image->height; dataset->numComponents = image->numComponents; - free(image->lpData); - free(image); + gree(image->lpData); + gree(image); closedir(dirp); return dataset; @@ -184,7 +185,7 @@ void addFilenamesToArray(char* path, char** array, int* index) { dirp = opendir(path); /* There should be error handling after this */ while ((entry = readdir(dirp)) != NULL) { if (entry->d_type == DT_REG) { /* If the entry is a regular file */ - filename = (char*)malloc(strlen(path)+strlen(entry->d_name)+2); + filename = (char*)nalloc(strlen(path)+strlen(entry->d_name)+2); sprintf(filename, "%s/%s", path, entry->d_name); array[i] = filename; i++; @@ -196,15 +197,15 @@ void addFilenamesToArray(char* path, char** array, int* index) { void free_dataset(jpegDataset* dataset) { for (int i=0; i < (int)dataset->numImages; i++) { - free(dataset->fileNames[i]); + gree(dataset->fileNames[i]); #ifdef STORE_IMAGES_TO_RAM - free(dataset->images[i]); + gree(dataset->images[i]); #endif } - free(dataset->fileNames); - free(dataset->labels); - free(dataset->images); - free(dataset); + gree(dataset->fileNames); + gree(dataset->labels); + gree(dataset->images); + gree(dataset); } unsigned int getLabel(char* string) { diff --git a/src/cnn/matrix_multiplication.cu b/src/cnn/matrix_multiplication.cu index a35a262..fba1104 100644 --- a/src/cnn/matrix_multiplication.cu +++ b/src/cnn/matrix_multiplication.cu @@ -9,7 +9,7 @@ #define BLOCKSIZE_y 16 #ifdef __CUDACC__ -__global__ void matrix_mul_kernel(float* Md, float* Nd, float* Pd, int n, int p, int q, size_t pitch_m, size_t pitch_n, size_t pitch_p) { +__global__ void matrix_mul_kernel(float** Md, float** Nd, float** Pd, int n, int p, int q) { // Chaque thread calcule toutes les multiplications utilisant l'élément Nd[tx][ty] int tx = (blockIdx.x*blockDim.x) + threadIdx.x; // Indice de colonne int ty = (blockIdx.y*blockDim.y) + threadIdx.y; // Indice de ligne @@ -18,58 +18,19 @@ __global__ void matrix_mul_kernel(float* Md, float* Nd, float* Pd, int n, int p, return; } - // Pvalue stores the Pd element that is computed by the thread - float* M_offset; - float* P_offset; - float* N_offset = (float *)((char*)Nd + tx * pitch_n); - float Nxy = N_offset[ty]; // N[tx][ty] - for (int i = 0; i < n; i++) { - M_offset = (float *)((char*)Md + i * pitch_m); - P_offset = (float*)((char*)Pd + i * pitch_p); // P[i], pitch_p est un décalage en bytes - atomicAdd(&P_offset[ty], M_offset[tx] * Nxy); // P[i][ty] += P[i][tx] * N[tx][ty] + atomicAdd(&(Pd[i][ty]), Md[i][tx]*Nd[tx][ty]); + // P[i][ty] += P[i][tx] * N[tx][ty] } } void matrix_multiplication_device(float** m1, float** m2, float** result, int n, int p, int q) { - // Préparation des matrices - size_t pitch_m1_dev; - size_t pitch_m2_dev; - size_t pitch_result_dev; - float* m1_dev; - float* m2_dev; - float* result_dev; - - gpuErrchk( cudaMallocPitch((void**)&m1_dev, &pitch_m1_dev, p * sizeof(float), n)); - for (int i=0; i < n; i++) { - gpuErrchk( cudaMemcpy((void*)((char*)m1_dev + i*pitch_m1_dev), (const void*)&(m1[i][0]), p*sizeof(float), cudaMemcpyHostToDevice)); - } - - gpuErrchk( cudaMallocPitch((void**)&m2_dev, &pitch_m2_dev, q * sizeof(float), p)); - for (int i=0; i < p; i++) { - gpuErrchk( cudaMemcpy((void*)((char*)m2_dev + i*pitch_m2_dev), (const void*)&(m2[i][0]), q*sizeof(float), cudaMemcpyHostToDevice)); - } - - gpuErrchk( cudaMallocPitch((void**)&result_dev, &pitch_result_dev, q * sizeof(float), n)); - gpuErrchk( cudaMemset(result_dev, 0, pitch_result_dev*n)); - // Traitement dim3 gridSize(i_div_up(p, BLOCKSIZE_x), i_div_up(q, BLOCKSIZE_y)); dim3 blockSize(BLOCKSIZE_x, BLOCKSIZE_y); - matrix_mul_kernel<<>>(m1_dev, m2_dev, result_dev, n, p, q, pitch_m1_dev, pitch_m2_dev, pitch_result_dev); - gpuErrchk( cudaPeekAtLastError() ); - gpuErrchk( cudaDeviceSynchronize() ); - - // Post-traitement - for (int i=0; i < n; i++) { - gpuErrchk( cudaMemcpy((void*)&(result[i][0]), (const void*)((char*)result_dev + i*pitch_result_dev), sizeof(float)*q, cudaMemcpyDeviceToHost)); - } - - gpuErrchk( cudaFree(result_dev) ); - gpuErrchk( cudaFree(m1_dev) ); - gpuErrchk( cudaFree(m2_dev) ); + matrix_mul_kernel<<>>(m1, m2, result, n, p, q); gpuErrchk( cudaPeekAtLastError() ); gpuErrchk( cudaDeviceSynchronize() ); } diff --git a/src/cnn/neuron_io.c b/src/cnn/neuron_io.c index 4d1948d..3d42ad4 100644 --- a/src/cnn/neuron_io.c +++ b/src/cnn/neuron_io.c @@ -4,6 +4,7 @@ #include #include "../include/colors.h" +#include "../include/utils.h" #include "include/neuron_io.h" #include "include/struct.h" @@ -130,7 +131,7 @@ void write_couche(Network* network, int indice_couche, int type_couche, FILE* pt Network* read_network(char* filename) { FILE *ptr; - Network* network = (Network*)malloc(sizeof(Network)); + Network* network = (Network*)nalloc(sizeof(Network)); ptr = fopen(filename, "rb"); @@ -156,8 +157,8 @@ Network* read_network(char* filename) { network->dropout = dropout; // Lecture de la taille de l'entrée des différentes matrices - network->width = (int*)malloc(sizeof(int)*size); - network->depth = (int*)malloc(sizeof(int)*size); + network->width = (int*)nalloc(sizeof(int)*size); + network->depth = (int*)nalloc(sizeof(int)*size); for (int i=0; i < (int)size; i++) { fread(&tmp, sizeof(uint32_t), 1, ptr); @@ -175,19 +176,19 @@ Network* read_network(char* filename) { } // Lecture de chaque couche - network->kernel = (Kernel**)malloc(sizeof(Kernel*)*(size-1)); + network->kernel = (Kernel**)nalloc(sizeof(Kernel*)*(size-1)); for (int i=0; i < (int)size-1; i++) { network->kernel[i] = read_kernel(type_couche[i], network->width[i+1], ptr); } - network->input = (float****)malloc(sizeof(float***)*size); + network->input = (float****)nalloc(sizeof(float***)*size); for (int i=0; i < (int)size; i++) { // input[size][couche->depth][couche->dim][couche->dim] - network->input[i] = (float***)malloc(sizeof(float**)*network->depth[i]); + network->input[i] = (float***)nalloc(sizeof(float**)*network->depth[i]); for (int j=0; j < network->depth[i]; j++) { - network->input[i][j] = (float**)malloc(sizeof(float*)*network->width[i]); + network->input[i][j] = (float**)nalloc(sizeof(float*)*network->width[i]); for (int k=0; k < network->width[i]; k++) { - network->input[i][j][k] = (float*)malloc(sizeof(float)*network->width[i]); + network->input[i][j][k] = (float*)nalloc(sizeof(float)*network->width[i]); for (int l=0; l < network->width[i]; l++) { network->input[i][j][k][l] = 0.; } @@ -195,13 +196,13 @@ Network* read_network(char* filename) { } } - network->input_z = (float****)malloc(sizeof(float***)*size); + network->input_z = (float****)nalloc(sizeof(float***)*size); for (int i=0; i < (int)size; i++) { // input[size][couche->depth][couche->dim][couche->dim] - network->input_z[i] = (float***)malloc(sizeof(float**)*network->depth[i]); + network->input_z[i] = (float***)nalloc(sizeof(float**)*network->depth[i]); for (int j=0; j < network->depth[i]; j++) { - network->input_z[i][j] = (float**)malloc(sizeof(float*)*network->width[i]); + network->input_z[i][j] = (float**)nalloc(sizeof(float*)*network->width[i]); for (int k=0; k < network->width[i]; k++) { - network->input_z[i][j][k] = (float*)malloc(sizeof(float)*network->width[i]); + network->input_z[i][j][k] = (float*)nalloc(sizeof(float)*network->width[i]); for (int l=0; l < network->width[i]; l++) { network->input_z[i][j][k][l] = 0.; } @@ -214,10 +215,10 @@ Network* read_network(char* filename) { } Kernel* read_kernel(int type_couche, int output_dim, FILE* ptr) { - Kernel* kernel = (Kernel*)malloc(sizeof(Kernel)); + Kernel* kernel = (Kernel*)nalloc(sizeof(Kernel)); if (type_couche == 0) { // Cas du CNN // Lecture du "Pré-corps" - kernel->cnn = (Kernel_cnn*)malloc(sizeof(Kernel_cnn)); + kernel->cnn = (Kernel_cnn*)nalloc(sizeof(Kernel_cnn)); kernel->nn = NULL; uint32_t buffer[5]; fread(&buffer, sizeof(buffer), 1, ptr); @@ -232,14 +233,14 @@ Kernel* read_kernel(int type_couche, int output_dim, FILE* ptr) { Kernel_cnn* cnn = kernel->cnn; float tmp; - cnn->bias = (float***)malloc(sizeof(float**)*cnn->columns); - cnn->d_bias = (float***)malloc(sizeof(float**)*cnn->columns); + cnn->bias = (float***)nalloc(sizeof(float**)*cnn->columns); + cnn->d_bias = (float***)nalloc(sizeof(float**)*cnn->columns); for (int i=0; i < cnn->columns; i++) { - cnn->bias[i] = (float**)malloc(sizeof(float*)*output_dim); - cnn->d_bias[i] = (float**)malloc(sizeof(float*)*output_dim); + cnn->bias[i] = (float**)nalloc(sizeof(float*)*output_dim); + cnn->d_bias[i] = (float**)nalloc(sizeof(float*)*output_dim); for (int j=0; j < output_dim; j++) { - cnn->bias[i][j] = (float*)malloc(sizeof(float)*output_dim); - cnn->d_bias[i][j] = (float*)malloc(sizeof(float)*output_dim); + cnn->bias[i][j] = (float*)nalloc(sizeof(float)*output_dim); + cnn->d_bias[i][j] = (float*)nalloc(sizeof(float)*output_dim); for (int k=0; k < output_dim; k++) { fread(&tmp, sizeof(tmp), 1, ptr); cnn->bias[i][j][k] = tmp; @@ -248,17 +249,17 @@ Kernel* read_kernel(int type_couche, int output_dim, FILE* ptr) { } } - cnn->w = (float****)malloc(sizeof(float***)*cnn->rows); - cnn->d_w = (float****)malloc(sizeof(float***)*cnn->rows); + cnn->w = (float****)nalloc(sizeof(float***)*cnn->rows); + cnn->d_w = (float****)nalloc(sizeof(float***)*cnn->rows); for (int i=0; i < cnn->rows; i++) { - cnn->w[i] = (float***)malloc(sizeof(float**)*cnn->columns); - cnn->d_w[i] = (float***)malloc(sizeof(float**)*cnn->columns); + cnn->w[i] = (float***)nalloc(sizeof(float**)*cnn->columns); + cnn->d_w[i] = (float***)nalloc(sizeof(float**)*cnn->columns); for (int j=0; j < cnn->columns; j++) { - cnn->w[i][j] = (float**)malloc(sizeof(float*)*cnn->k_size); - cnn->d_w[i][j] = (float**)malloc(sizeof(float*)*cnn->k_size); + cnn->w[i][j] = (float**)nalloc(sizeof(float*)*cnn->k_size); + cnn->d_w[i][j] = (float**)nalloc(sizeof(float*)*cnn->k_size); for (int k=0; k < cnn->k_size; k++) { - cnn->w[i][j][k] = (float*)malloc(sizeof(float)*cnn->k_size); - cnn->d_w[i][j][k] = (float*)malloc(sizeof(float)*cnn->k_size); + cnn->w[i][j][k] = (float*)nalloc(sizeof(float)*cnn->k_size); + cnn->d_w[i][j][k] = (float*)nalloc(sizeof(float)*cnn->k_size); for (int l=0; l < cnn->k_size; l++) { fread(&tmp, sizeof(tmp), 1, ptr); cnn->w[i][j][k][l] = tmp; @@ -269,7 +270,7 @@ Kernel* read_kernel(int type_couche, int output_dim, FILE* ptr) { } } else if (type_couche == 1) { // Cas du NN // Lecture du "Pré-corps" - kernel->nn = (Kernel_nn*)malloc(sizeof(Kernel_nn)); + kernel->nn = (Kernel_nn*)nalloc(sizeof(Kernel_nn)); kernel->cnn = NULL; uint32_t buffer[4]; fread(&buffer, sizeof(buffer), 1, ptr); @@ -283,19 +284,19 @@ Kernel* read_kernel(int type_couche, int output_dim, FILE* ptr) { Kernel_nn* nn = kernel->nn; float tmp; - nn->bias = (float*)malloc(sizeof(float)*nn->output_units); - nn->d_bias = (float*)malloc(sizeof(float)*nn->output_units); + nn->bias = (float*)nalloc(sizeof(float)*nn->output_units); + nn->d_bias = (float*)nalloc(sizeof(float)*nn->output_units); for (int i=0; i < nn->output_units; i++) { fread(&tmp, sizeof(tmp), 1, ptr); nn->bias[i] = tmp; nn->d_bias[i] = 0.; } - nn->weights = (float**)malloc(sizeof(float*)*nn->input_units); - nn->d_weights = (float**)malloc(sizeof(float*)*nn->input_units); + nn->weights = (float**)nalloc(sizeof(float*)*nn->input_units); + nn->d_weights = (float**)nalloc(sizeof(float*)*nn->input_units); for (int i=0; i < nn->input_units; i++) { - nn->weights[i] = (float*)malloc(sizeof(float)*nn->output_units); - nn->d_weights[i] = (float*)malloc(sizeof(float)*nn->output_units); + nn->weights[i] = (float*)nalloc(sizeof(float)*nn->output_units); + nn->d_weights[i] = (float*)nalloc(sizeof(float)*nn->output_units); for (int j=0; j < nn->output_units; j++) { fread(&tmp, sizeof(tmp), 1, ptr); nn->weights[i][j] = tmp; diff --git a/src/cnn/preview.c b/src/cnn/preview.c index 9875dcd..2256eae 100644 --- a/src/cnn/preview.c +++ b/src/cnn/preview.c @@ -3,6 +3,8 @@ #include #include +#include "../include/utils.h" + #include "include/jpeg.h" @@ -36,11 +38,11 @@ void preview_images(char* path, int limit) { if (!dataset->images[i]) { image = loadJpegImageFile(dataset->fileNames[i]); dataset->images[i] = image->lpData; - free(image); + gree(image); } print_image(dataset->images[i], dataset->height, dataset->width); - free(dataset->images[i]); + gree(dataset->images[i]); } } diff --git a/src/cnn/test_network.c b/src/cnn/test_network.c index bd53641..8987797 100644 --- a/src/cnn/test_network.c +++ b/src/cnn/test_network.c @@ -5,6 +5,7 @@ #include "../mnist/include/mnist.h" #include "include/neuron_io.h" +#include "../include/utils.h" #include "include/struct.h" #include "include/jpeg.h" #include "include/free.h" @@ -29,7 +30,7 @@ void test_network_mnist(Network* network, char* images_file, char* labels_file, width = mnist_parameters[1]; height = mnist_parameters[2]; - free(mnist_parameters); + gree(mnist_parameters); // Load image in the first layer of the Network for (int i=0; i < nb_elem; i++) { @@ -46,11 +47,11 @@ void test_network_mnist(Network* network, char* images_file, char* labels_file, } for (int j=0; j < height; j++) { - free(images[i][j]); + gree(images[i][j]); } - free(images[i]); + gree(images[i]); } - free(images); + gree(images); printf("%d Images. Taux de réussite: %.2f%%\n", nb_elem, 100*accuracy/(float)nb_elem); } @@ -75,13 +76,13 @@ void test_network_jpg(Network* network, char* data_dir, bool preview_fails) { accuracy++; } - free(dataset->images[i]); + gree(dataset->images[i]); } printf("%d Images. Taux de réussite: %.2f%%\n", dataset->numImages, 100*accuracy/(float)dataset->numImages); - free(dataset->images); - free(dataset->labels); - free(dataset); + gree(dataset->images); + gree(dataset->labels); + gree(dataset); } @@ -109,7 +110,7 @@ void recognize_mnist(Network* network, char* input_file, char* out) { width = mnist_parameters[1]; height = mnist_parameters[2]; - free(mnist_parameters); + gree(mnist_parameters); if (! strcmp(out, "json")) { printf("{\n"); @@ -147,15 +148,15 @@ void recognize_mnist(Network* network, char* input_file, char* out) { } for (int j=0; j < height; j++) { - free(images[i][j]); + gree(images[i][j]); } - free(images[i]); + gree(images[i]); } if (! strcmp(out, "json")) { printf("}\n"); } - free(images); + gree(images); } void recognize_jpg(Network* network, char* input_file, char* out) { @@ -194,8 +195,8 @@ void recognize_jpg(Network* network, char* input_file, char* out) { printf("}\n"); } - free(image->lpData); - free(image); + gree(image->lpData); + gree(image); } void recognize(int dataset_type, char* modele, char* input_file, char* out) { diff --git a/src/cnn/train.c b/src/cnn/train.c index 4835f06..df72b45 100644 --- a/src/cnn/train.c +++ b/src/cnn/train.c @@ -11,6 +11,7 @@ #include "include/initialisation.h" #include "include/neuron_io.h" #include "../include/colors.h" +#include "../include/utils.h" #include "include/function.h" #include "include/creation.h" #include "include/update.h" @@ -70,7 +71,7 @@ void* train_thread(void* parameters) { if (!param->dataset->images[index[i]]) { image = loadJpegImageFile(param->dataset->fileNames[index[i]]); param->dataset->images[index[i]] = image->lpData; - free(image); + gree(image); } write_image_in_network_260(param->dataset->images[index[i]], height, width, network->input[0]); forward_propagation(network); @@ -81,7 +82,7 @@ void* train_thread(void* parameters) { accuracy += 1.; } - free(param->dataset->images[index[i]]); + gree(param->dataset->images[index[i]]); param->dataset->images[index[i]] = NULL; } } @@ -123,7 +124,7 @@ void train(int dataset_type, char* images_file, char* labels_file, char* data_di // Chargement des images du set de données MNIST int* parameters = read_mnist_images_parameters(images_file); nb_images_total = parameters[0]; - free(parameters); + gree(parameters); images = read_mnist_images(images_file); labels = read_mnist_labels(labels_file); @@ -191,7 +192,7 @@ void train(int dataset_type, char* images_file, char* labels_file, char* data_di // thread dans l'hypothèse ou le multi-threading n'est pas utilisé. // Cela est utile à des fins de débogage notamment, // où l'utilisation de threads rend vite les choses plus compliquées qu'elles ne le sont. - TrainParameters* train_params = (TrainParameters*)malloc(sizeof(TrainParameters)); + TrainParameters* train_params = (TrainParameters*)nalloc(sizeof(TrainParameters)); train_params->network = network; train_params->dataset_type = dataset_type; @@ -283,7 +284,7 @@ void train(int dataset_type, char* images_file, char* labels_file, char* data_di } } current_accuracy = accuracy * nb_images_total/((j+1)*BATCHES); - printf("\rThreads [%d]\tÉpoque [%d/%d]\tImage [%d/%d]\tAccuracy: "YELLOW"%0.2f%%"RESET, nb_threads, i, epochs, BATCHES*(j+1), nb_images_total, current_accuracy*100); + printf("\rThreads [%d]\tÉpoque [%d/%d]\tImage [%d/%d]\tAccuracy: " YELLOW "%0.2f%%" RESET, nb_threads, i, epochs, BATCHES*(j+1), nb_images_total, current_accuracy*100); fflush(stdout); #else (void)nb_images_total_remaining; // Juste pour enlever un warning @@ -315,7 +316,7 @@ void train(int dataset_type, char* images_file, char* labels_file, char* data_di end_time = omp_get_wtime(); elapsed_time = end_time - start_time; #ifdef USE_MULTITHREADING - printf("\rThreads [%d]\tÉpoque [%d/%d]\tImage [%d/%d]\tAccuracy: "GREEN"%0.4f%%"RESET" \tTemps: %0.2f s\n", nb_threads, i, epochs, nb_images_total, nb_images_total, accuracy*100, elapsed_time); + printf("\rThreads [%d]\tÉpoque [%d/%d]\tImage [%d/%d]\tAccuracy: " GREEN "%0.4f%%" RESET " \tTemps: %0.2f s\n", nb_threads, i, epochs, nb_images_total, nb_images_total, accuracy*100, elapsed_time); #else printf("\rÉpoque [%d/%d]\tImage [%d/%d]\tAccuracy: "GREEN"%0.4f%%"RESET" \tTemps: %0.2f s\n", i, epochs, nb_images_total, nb_images_total, accuracy*100, elapsed_time); #endif @@ -332,7 +333,7 @@ void train(int dataset_type, char* images_file, char* labels_file, char* data_di #ifdef USE_MULTITHREADING free(tid); for (int i=0; i < nb_threads; i++) { - free(train_parameters[i]->network); + free_network(train_parameters[i]->network); } free(train_parameters); #else @@ -342,12 +343,12 @@ void train(int dataset_type, char* images_file, char* labels_file, char* data_di if (dataset_type == 0) { for (int i=0; i < nb_images_total; i++) { for (int j=0; j < 28; j++) { - free(images[i][j]); + gree(images[i][j]); } - free(images[i]); + gree(images[i]); } - free(images); - free(labels); + gree(images); + gree(labels); } else { free_dataset(dataset); } diff --git a/src/cnn/utils.c b/src/cnn/utils.c index f5b2bdd..30c3410 100644 --- a/src/cnn/utils.c +++ b/src/cnn/utils.c @@ -4,6 +4,7 @@ #include #include "../include/colors.h" +#include "../include/utils.h" #include "include/struct.h" #define copyVar(var) network_cp->var = network->var @@ -93,7 +94,7 @@ bool equals_networks(Network* network1, Network* network2) { Network* copy_network(Network* network) { - Network* network_cp = (Network*)malloc(sizeof(Network)); + Network* network_cp = (Network*)nalloc(sizeof(Network)); // Paramètre du réseau int size = network->size; // Paramètres des couches NN @@ -111,17 +112,17 @@ Network* copy_network(Network* network) { copyVar(max_size); copyVar(size); - network_cp->width = (int*)malloc(sizeof(int)*size); - network_cp->depth = (int*)malloc(sizeof(int)*size); + network_cp->width = (int*)nalloc(sizeof(int)*size); + network_cp->depth = (int*)nalloc(sizeof(int)*size); for (int i=0; i < size; i++) { copyVar(width[i]); copyVar(depth[i]); } - network_cp->kernel = (Kernel**)malloc(sizeof(Kernel*)*(size-1)); + network_cp->kernel = (Kernel**)nalloc(sizeof(Kernel*)*(size-1)); for (int i=0; i < size-1; i++) { - network_cp->kernel[i] = (Kernel*)malloc(sizeof(Kernel)); + network_cp->kernel[i] = (Kernel*)nalloc(sizeof(Kernel)); if (!network->kernel[i]->nn && !network->kernel[i]->cnn) { // Cas de la couche de linéarisation copyVar(kernel[i]->activation); copyVar(kernel[i]->linearisation); // 1 @@ -136,23 +137,23 @@ Network* copy_network(Network* network) { output_units = network->kernel[i]->nn->output_units; network_cp->kernel[i]->cnn = NULL; - network_cp->kernel[i]->nn = (Kernel_nn*)malloc(sizeof(Kernel_nn)); + network_cp->kernel[i]->nn = (Kernel_nn*)nalloc(sizeof(Kernel_nn)); copyVar(kernel[i]->nn->input_units); copyVar(kernel[i]->nn->output_units); - network_cp->kernel[i]->nn->bias = (float*)malloc(sizeof(float)*output_units); - network_cp->kernel[i]->nn->d_bias = (float*)malloc(sizeof(float)*output_units); + network_cp->kernel[i]->nn->bias = (float*)nalloc(sizeof(float)*output_units); + network_cp->kernel[i]->nn->d_bias = (float*)nalloc(sizeof(float)*output_units); for (int j=0; j < output_units; j++) { copyVar(kernel[i]->nn->bias[j]); network_cp->kernel[i]->nn->d_bias[j] = 0.; } - network_cp->kernel[i]->nn->weights = (float**)malloc(sizeof(float*)*input_units); - network_cp->kernel[i]->nn->d_weights = (float**)malloc(sizeof(float*)*input_units); + network_cp->kernel[i]->nn->weights = (float**)nalloc(sizeof(float*)*input_units); + network_cp->kernel[i]->nn->d_weights = (float**)nalloc(sizeof(float*)*input_units); for (int j=0; j < input_units; j++) { - network_cp->kernel[i]->nn->weights[j] = (float*)malloc(sizeof(float)*output_units); - network_cp->kernel[i]->nn->d_weights[j] = (float*)malloc(sizeof(float)*output_units); + network_cp->kernel[i]->nn->weights[j] = (float*)nalloc(sizeof(float)*output_units); + network_cp->kernel[i]->nn->d_weights[j] = (float*)nalloc(sizeof(float)*output_units); for (int k=0; k < output_units; k++) { copyVar(kernel[i]->nn->weights[j][k]); network_cp->kernel[i]->nn->d_weights[j][k] = 0.; @@ -170,20 +171,20 @@ Network* copy_network(Network* network) { network_cp->kernel[i]->nn = NULL; - network_cp->kernel[i]->cnn = (Kernel_cnn*)malloc(sizeof(Kernel_cnn)); + network_cp->kernel[i]->cnn = (Kernel_cnn*)nalloc(sizeof(Kernel_cnn)); copyVar(kernel[i]->cnn->rows); copyVar(kernel[i]->cnn->k_size); copyVar(kernel[i]->cnn->columns); - network_cp->kernel[i]->cnn->bias = (float***)malloc(sizeof(float**)*columns); - network_cp->kernel[i]->cnn->d_bias = (float***)malloc(sizeof(float**)*columns); + network_cp->kernel[i]->cnn->bias = (float***)nalloc(sizeof(float**)*columns); + network_cp->kernel[i]->cnn->d_bias = (float***)nalloc(sizeof(float**)*columns); for (int j=0; j < columns; j++) { - network_cp->kernel[i]->cnn->bias[j] = (float**)malloc(sizeof(float*)*output_dim); - network_cp->kernel[i]->cnn->d_bias[j] = (float**)malloc(sizeof(float*)*output_dim); + network_cp->kernel[i]->cnn->bias[j] = (float**)nalloc(sizeof(float*)*output_dim); + network_cp->kernel[i]->cnn->d_bias[j] = (float**)nalloc(sizeof(float*)*output_dim); for (int k=0; k < output_dim; k++) { - network_cp->kernel[i]->cnn->bias[j][k] = (float*)malloc(sizeof(float)*output_dim); - network_cp->kernel[i]->cnn->d_bias[j][k] = (float*)malloc(sizeof(float)*output_dim); + network_cp->kernel[i]->cnn->bias[j][k] = (float*)nalloc(sizeof(float)*output_dim); + network_cp->kernel[i]->cnn->d_bias[j][k] = (float*)nalloc(sizeof(float)*output_dim); for (int l=0; l < output_dim; l++) { copyVar(kernel[i]->cnn->bias[j][k][l]); network_cp->kernel[i]->cnn->d_bias[j][k][l] = 0.; @@ -191,17 +192,17 @@ Network* copy_network(Network* network) { } } - network_cp->kernel[i]->cnn->w = (float****)malloc(sizeof(float***)*rows); - network_cp->kernel[i]->cnn->d_w = (float****)malloc(sizeof(float***)*rows); + network_cp->kernel[i]->cnn->w = (float****)nalloc(sizeof(float***)*rows); + network_cp->kernel[i]->cnn->d_w = (float****)nalloc(sizeof(float***)*rows); for (int j=0; j < rows; j++) { - network_cp->kernel[i]->cnn->w[j] = (float***)malloc(sizeof(float**)*columns); - network_cp->kernel[i]->cnn->d_w[j] = (float***)malloc(sizeof(float**)*columns); + network_cp->kernel[i]->cnn->w[j] = (float***)nalloc(sizeof(float**)*columns); + network_cp->kernel[i]->cnn->d_w[j] = (float***)nalloc(sizeof(float**)*columns); for (int k=0; k < columns; k++) { - network_cp->kernel[i]->cnn->w[j][k] = (float**)malloc(sizeof(float*)*k_size); - network_cp->kernel[i]->cnn->d_w[j][k] = (float**)malloc(sizeof(float*)*k_size); + network_cp->kernel[i]->cnn->w[j][k] = (float**)nalloc(sizeof(float*)*k_size); + network_cp->kernel[i]->cnn->d_w[j][k] = (float**)nalloc(sizeof(float*)*k_size); for (int l=0; l < k_size; l++) { - network_cp->kernel[i]->cnn->w[j][k][l] = (float*)malloc(sizeof(float)*k_size); - network_cp->kernel[i]->cnn->d_w[j][k][l] = (float*)malloc(sizeof(float)*k_size); + network_cp->kernel[i]->cnn->w[j][k][l] = (float*)nalloc(sizeof(float)*k_size); + network_cp->kernel[i]->cnn->d_w[j][k][l] = (float*)nalloc(sizeof(float)*k_size); for (int m=0; m < k_size; m++) { copyVar(kernel[i]->cnn->w[j][k][l][m]); network_cp->kernel[i]->cnn->d_w[j][k][l][m] = 0.; @@ -212,13 +213,13 @@ Network* copy_network(Network* network) { } } - network_cp->input = (float****)malloc(sizeof(float***)*size); + network_cp->input = (float****)nalloc(sizeof(float***)*size); for (int i=0; i < size; i++) { // input[size][couche->depth][couche->dim][couche->dim] - network_cp->input[i] = (float***)malloc(sizeof(float**)*network->depth[i]); + network_cp->input[i] = (float***)nalloc(sizeof(float**)*network->depth[i]); for (int j=0; j < network->depth[i]; j++) { - network_cp->input[i][j] = (float**)malloc(sizeof(float*)*network->width[i]); + network_cp->input[i][j] = (float**)nalloc(sizeof(float*)*network->width[i]); for (int k=0; k < network->width[i]; k++) { - network_cp->input[i][j][k] = (float*)malloc(sizeof(float)*network->width[i]); + network_cp->input[i][j][k] = (float*)nalloc(sizeof(float)*network->width[i]); for (int l=0; l < network->width[i]; l++) { network_cp->input[i][j][k][l] = 0.; } @@ -226,13 +227,13 @@ Network* copy_network(Network* network) { } } - network_cp->input_z = (float****)malloc(sizeof(float***)*size); + network_cp->input_z = (float****)nalloc(sizeof(float***)*size); for (int i=0; i < size; i++) { // input_z[size][couche->depth][couche->dim][couche->dim] - network_cp->input_z[i] = (float***)malloc(sizeof(float**)*network->depth[i]); + network_cp->input_z[i] = (float***)nalloc(sizeof(float**)*network->depth[i]); for (int j=0; j < network->depth[i]; j++) { - network_cp->input_z[i][j] = (float**)malloc(sizeof(float*)*network->width[i]); + network_cp->input_z[i][j] = (float**)nalloc(sizeof(float*)*network->width[i]); for (int k=0; k < network->width[i]; k++) { - network_cp->input_z[i][j][k] = (float*)malloc(sizeof(float)*network->width[i]); + network_cp->input_z[i][j][k] = (float*)nalloc(sizeof(float)*network->width[i]); for (int l=0; l < network->width[i]; l++) { network_cp->input_z[i][j][k][l] = 0.; } diff --git a/src/include/utils.h b/src/include/utils.h index 2f96a2d..77e5862 100644 --- a/src/include/utils.h +++ b/src/include/utils.h @@ -1,3 +1,9 @@ +#include +#include +#ifdef USE_CUDA + #include "cuda_runtime.h" +#endif + #ifndef DEF_UTILS_CU_H #define DEF_UTILS_CU_H @@ -22,4 +28,8 @@ int i_div_up(int a, int b); */ bool check_cuda_compatibility(); + +void* nalloc(size_t sz); + +void gree(void* ptr); #endif \ No newline at end of file diff --git a/src/utils.c b/src/utils.c new file mode 100644 index 0000000..6f5c126 --- /dev/null +++ b/src/utils.c @@ -0,0 +1,66 @@ +#include +#include +#include + +#ifdef USE_CUDA + #include "cuda_runtime.h" +#endif +#include "include/utils.h" +#include "include/colors.h" + + +int i_div_up(int a, int b) { // Partie entière supérieure de a/b + return ((a % b) != 0) ? (a / b + 1) : (a / b); +} + +bool check_cuda_compatibility() { + #ifdef __CUDACC__ + int nDevices; + cudaDeviceProp prop; + + cudaGetDeviceCount(&nDevices); + if (nDevices == 0) { + printf("Pas d'utilisation du GPU\n\n"); + return false; + } + + printf("GPUs disponibles:\n"); + + for (int i=0; i < nDevices; i++) { + cudaGetDeviceProperties(&prop, i); + printf(" - %s\n", prop.name); + } + + cudaGetDeviceProperties(&prop, 0); + printf("Utilisation du GPU: " BLUE "%s" RESET " (Compute capability: %d.%d)\n\n", prop.name, prop.major, prop.minor); + return true; + #else + printf("Pas d'utilisation du GPU\n\n"); + return false; + #endif +} + +#ifndef USE_CUDA + +void* nalloc(size_t sz) { + void* ptr = malloc(sz); + return ptr; +} + +void gree(void* ptr) { + free(ptr); +} + +#else + +void* nalloc(size_t sz) { + void* ptr; + cudaMallocManaged(&ptr, sz, cudaMemAttachHost); + return ptr; +} + +void gree(void* ptr) { + cudaFree(ptr); +} + +#endif \ No newline at end of file diff --git a/src/utils.cu b/src/utils.cu index 8998d22..a8d4787 100644 --- a/src/utils.cu +++ b/src/utils.cu @@ -34,4 +34,29 @@ bool check_cuda_compatibility() { printf("Pas d'utilisation du GPU\n\n"); return false; #endif -} \ No newline at end of file +} + +#ifndef __CUDACC__ + +void* nalloc(size_t sz) { + void* ptr = malloc(sz); + return ptr; +} + +void gree(void* ptr) { + free(ptr); +} + +#else + +void* nalloc(size_t sz) { + void* ptr; + cudaMallocManaged(&ptr, sz, cudaMemAttachHost); + return ptr; +} + +void gree(void* ptr) { + cudaFree(ptr); +} + +#endif diff --git a/test/cnn_convolution.cu b/test/cnn_convolution.cu index 4e4779d..4bfe4db 100644 --- a/test/cnn_convolution.cu +++ b/test/cnn_convolution.cu @@ -41,11 +41,11 @@ void print_matrix(float** mat, int n, int p) { float*** create_matrix(int n, int p, int q, float max_val) { - float*** matrix = (float***)malloc(n*sizeof(float**)); + float*** matrix = (float***)nalloc(n*sizeof(float**)); for (int i=0; i < n; i++) { - matrix[i] = (float**)malloc(sizeof(float*)*p); + matrix[i] = (float**)nalloc(sizeof(float*)*p); for (int j=0; j < p; j++) { - matrix[i][j] = (float*)malloc(sizeof(float)*q); + matrix[i][j] = (float*)nalloc(sizeof(float)*q); } } @@ -55,11 +55,11 @@ float*** create_matrix(int n, int p, int q, float max_val) { float*** create_empty_matrix(int n, int p, int q) { - float*** matrix = (float***)malloc(n*sizeof(float**)); + float*** matrix = (float***)nalloc(n*sizeof(float**)); for (int i=0; i < n; i++) { - matrix[i] = (float**)malloc(sizeof(float*)*p); + matrix[i] = (float**)nalloc(sizeof(float*)*p); for (int j=0; j < p; j++) { - matrix[i][j] = (float*)malloc(sizeof(float)*q); + matrix[i][j] = (float*)nalloc(sizeof(float)*q); for (int k=0; k < q; k++) { matrix[i][j][k] = 0.; } @@ -71,11 +71,11 @@ float*** create_empty_matrix(int n, int p, int q) { void free_matrix(float*** matrix, int n, int p) { for (int i=0; i < n; i++) { for (int j=0; j < p; j++) { - free(matrix[i][j]); + gree(matrix[i][j]); } - free(matrix[i]); + gree(matrix[i]); } - free(matrix); + gree(matrix); } bool check_matrices_equality(float*** m1, float*** m2, int n, int p, int q, int acceptation) { @@ -97,7 +97,7 @@ void run_convolution_test(int input_dim, int output_dim, int rows, int columns) int k_size = input_dim - output_dim +1; // Génération des données aléatoires - Kernel_cnn* kernel = (Kernel_cnn*)malloc(sizeof(Kernel_cnn)); + Kernel_cnn* kernel = (Kernel_cnn*)nalloc(sizeof(Kernel_cnn)); kernel->k_size = k_size; kernel->rows = rows; @@ -108,8 +108,8 @@ void run_convolution_test(int input_dim, int output_dim, int rows, int columns) kernel->d_bias = create_matrix(kernel->columns, output_dim, output_dim, 1.5f); // w[rows][columns][k_size][k_size] - kernel->w = (float****)malloc(sizeof(float***)*kernel->rows); - kernel->d_w = (float****)malloc(sizeof(float***)*kernel->rows); + kernel->w = (float****)nalloc(sizeof(float***)*kernel->rows); + kernel->d_w = (float****)nalloc(sizeof(float***)*kernel->rows); for (int i=0; i < kernel->rows; i++) { kernel->w[i] = create_matrix(kernel->columns, kernel->k_size, kernel->k_size, 15.0f); kernel->d_w[i] = create_matrix(kernel->columns, kernel->k_size, kernel->k_size, 1.5f); @@ -156,8 +156,8 @@ void run_convolution_test(int input_dim, int output_dim, int rows, int columns) free_matrix(kernel->w[i], kernel->columns, kernel->k_size); free_matrix(kernel->d_w[i], kernel->columns, kernel->k_size); } - free(kernel->w); - free(kernel->d_w); + gree(kernel->w); + gree(kernel->d_w); free_matrix(input, kernel->rows, input_dim); free_matrix(output_cpu, kernel->columns, output_dim); diff --git a/test/cnn_matrix_multiplication.cu b/test/cnn_matrix_multiplication.cu index 69f1ce7..cd322b8 100644 --- a/test/cnn_matrix_multiplication.cu +++ b/test/cnn_matrix_multiplication.cu @@ -37,9 +37,9 @@ void print_matrix(float** mat, int n, int p) { float** create_matrix(int n, int p) { - float** matrix = (float**)malloc(n*sizeof(float*)); + float** matrix = (float**)nalloc(n*sizeof(float*)); for (int i=0; i < n; i++) { - matrix[i] = (float*)malloc(sizeof(float)*p); + matrix[i] = (float*)nalloc(sizeof(float)*p); } fill_matrix_random(matrix, n, p); @@ -48,9 +48,9 @@ float** create_matrix(int n, int p) { float** create_empty_matrix(int n, int p) { - float** matrix = (float**)malloc(n*sizeof(float*)); + float** matrix = (float**)nalloc(n*sizeof(float*)); for (int i=0; i < n; i++) { - matrix[i] = (float*)malloc(p*sizeof(float)); + matrix[i] = (float*)nalloc(p*sizeof(float)); for (int j=0; j < p; j++) { matrix[i][j] = 0.; } @@ -103,24 +103,24 @@ void run_matrices_test(int n, int p, int q) { // On libère l'espace mémoire alloué for (int i=0; i < n; i++) { - free(matrix1[i]); + gree(matrix1[i]); } - free(matrix1); + gree(matrix1); for (int i=0; i < p; i++) { - free(matrix2[i]); + gree(matrix2[i]); } - free(matrix2); + gree(matrix2); for (int i=0; i < n; i++) { - free(result_cpu[i]); + gree(result_cpu[i]); } - free(result_cpu); + gree(result_cpu); for (int i=0; i < n; i++) { - free(result_gpu[i]); + gree(result_gpu[i]); } - free(result_gpu); + gree(result_gpu); }