mirror of
https://github.com/augustin64/projet-tipe
synced 2025-01-23 15:16:26 +01:00
Implement custom memory management
This commit is contained in:
parent
fe880f9aae
commit
64e1cf0ad5
58
Makefile
58
Makefile
@ -58,6 +58,9 @@ $(BUILDDIR)/mnist-preview: $(MNIST_SRCDIR)/preview.c $(BUILDDIR)/mnist.o
|
||||
$(BUILDDIR)/mnist.o: $(MNIST_SRCDIR)/mnist.c $(MNIST_SRCDIR)/include/mnist.h
|
||||
$(CC) -c $< -o $@ $(CFLAGS)
|
||||
|
||||
$(BUILDDIR)/mnist.cuda.o: $(MNIST_SRCDIR)/mnist.c $(MNIST_SRCDIR)/include/mnist.h
|
||||
$(CC) -c $< -o $@ $(CFLAGS) -DUSE_CUDA -lcuda -I/opt/cuda/include
|
||||
|
||||
$(BUILDDIR)/mnist_%.o: $(MNIST_SRCDIR)/%.c $(MNIST_SRCDIR)/include/%.h
|
||||
$(CC) -c $< -o $@ $(CFLAGS)
|
||||
|
||||
@ -67,22 +70,60 @@ $(BUILDDIR)/mnist_%.o: $(MNIST_SRCDIR)/%.c $(MNIST_SRCDIR)/include/%.h
|
||||
#
|
||||
cnn: $(BUILDDIR)/cnn-main $(BUILDDIR)/cnn-main-cuda $(BUILDDIR)/cnn-preview;
|
||||
|
||||
$(BUILDDIR)/cnn-main: $(CNN_SRCDIR)/main.c $(BUILDDIR)/cnn_train.o $(BUILDDIR)/cnn_test_network.o $(BUILDDIR)/cnn_cnn.o $(BUILDDIR)/cnn_creation.o $(BUILDDIR)/cnn_initialisation.o $(BUILDDIR)/cnn_make.o $(BUILDDIR)/cnn_neuron_io.o $(BUILDDIR)/cnn_function.o $(BUILDDIR)/cnn_utils.o $(BUILDDIR)/cnn_update.o $(BUILDDIR)/cnn_free.o $(BUILDDIR)/cnn_jpeg.o $(BUILDDIR)/cnn_convolution.o $(BUILDDIR)/cnn_backpropagation.o $(BUILDDIR)/colors.o $(BUILDDIR)/mnist.o
|
||||
$(BUILDDIR)/cnn-main: $(CNN_SRCDIR)/main.c \
|
||||
$(BUILDDIR)/cnn_train.o \
|
||||
$(BUILDDIR)/cnn_test_network.o \
|
||||
$(BUILDDIR)/cnn_cnn.o \
|
||||
$(BUILDDIR)/cnn_creation.o \
|
||||
$(BUILDDIR)/cnn_initialisation.o \
|
||||
$(BUILDDIR)/cnn_make.o \
|
||||
$(BUILDDIR)/cnn_neuron_io.o \
|
||||
$(BUILDDIR)/cnn_function.o \
|
||||
$(BUILDDIR)/cnn_utils.o \
|
||||
$(BUILDDIR)/cnn_update.o \
|
||||
$(BUILDDIR)/cnn_free.o \
|
||||
$(BUILDDIR)/cnn_jpeg.o \
|
||||
$(BUILDDIR)/cnn_convolution.o \
|
||||
$(BUILDDIR)/cnn_backpropagation.o \
|
||||
$(BUILDDIR)/colors.o \
|
||||
$(BUILDDIR)/mnist.o \
|
||||
$(BUILDDIR)/utils.o
|
||||
$(CC) $^ -o $@ $(CFLAGS)
|
||||
|
||||
$(BUILDDIR)/cnn-main-cuda: $(BUILDDIR)/cnn_main.o $(BUILDDIR)/cnn_train.o $(BUILDDIR)/cnn_test_network.o $(BUILDDIR)/cnn_cnn.o $(BUILDDIR)/cnn_creation.o $(BUILDDIR)/cnn_initialisation.o $(BUILDDIR)/cnn_make.o $(BUILDDIR)/cnn_neuron_io.o $(BUILDDIR)/cnn_function.o $(BUILDDIR)/cnn_utils.o $(BUILDDIR)/cnn_update.o $(BUILDDIR)/cnn_free.o $(BUILDDIR)/cnn_jpeg.o $(BUILDDIR)/cnn_cuda_convolution.o $(BUILDDIR)/cnn_backpropagation.o $(BUILDDIR)/cuda_utils.o $(BUILDDIR)/colors.o $(BUILDDIR)/mnist.o
|
||||
$(BUILDDIR)/cnn-main-cuda: $(BUILDDIR)/cnn_main.cuda.o \
|
||||
$(BUILDDIR)/cnn_train.cuda.o \
|
||||
$(BUILDDIR)/cnn_test_network.cuda.o \
|
||||
$(BUILDDIR)/cnn_cnn.cuda.o \
|
||||
$(BUILDDIR)/cnn_creation.cuda.o \
|
||||
$(BUILDDIR)/cnn_initialisation.cuda.o \
|
||||
$(BUILDDIR)/cnn_make.cuda.o \
|
||||
$(BUILDDIR)/cnn_neuron_io.cuda.o \
|
||||
$(BUILDDIR)/cnn_function.cuda.o \
|
||||
$(BUILDDIR)/cnn_utils.cuda.o \
|
||||
$(BUILDDIR)/cnn_update.cuda.o \
|
||||
$(BUILDDIR)/cnn_free.cuda.o \
|
||||
$(BUILDDIR)/cnn_jpeg.cuda.o \
|
||||
$(BUILDDIR)/cnn_cuda_convolution.o \
|
||||
$(BUILDDIR)/cnn_backpropagation.cuda.o \
|
||||
$(BUILDDIR)/colors.cuda.o \
|
||||
$(BUILDDIR)/mnist.cuda.o \
|
||||
$(BUILDDIR)/utils.cuda.o \
|
||||
$(BUILDDIR)/cuda_utils.o
|
||||
ifndef NVCC_INSTALLED
|
||||
@echo "$(NVCC) not found, skipping"
|
||||
else
|
||||
$(NVCC) $(NVCCFLAGS) $^ -o $@
|
||||
endif
|
||||
|
||||
$(BUILDDIR)/cnn-preview: $(CNN_SRCDIR)/preview.c $(BUILDDIR)/cnn_jpeg.o $(BUILDDIR)/colors.o
|
||||
$(BUILDDIR)/cnn-preview: $(CNN_SRCDIR)/preview.c $(BUILDDIR)/cnn_jpeg.o $(BUILDDIR)/colors.o $(BUILDDIR)/utils.o
|
||||
$(CC) $^ -o $@ $(CFLAGS)
|
||||
|
||||
$(BUILDDIR)/cnn_%.o: $(CNN_SRCDIR)/%.c $(CNN_SRCDIR)/include/%.h
|
||||
$(CC) -c $< -o $@ $(CFLAGS)
|
||||
|
||||
$(BUILDDIR)/cnn_%.cuda.o: $(CNN_SRCDIR)/%.c $(CNN_SRCDIR)/include/%.h
|
||||
$(CC) -c $< -o $@ $(CFLAGS) -DUSE_CUDA -lcuda -I/opt/cuda/include
|
||||
|
||||
$(BUILDDIR)/cnn_cuda_%.o: $(CNN_SRCDIR)/%.cu $(CNN_SRCDIR)/include/%.h
|
||||
ifndef NVCC_INSTALLED
|
||||
@echo "$(NVCC) not found, skipping"
|
||||
@ -95,6 +136,9 @@ endif
|
||||
$(BUILDDIR)/%.o: $(SRCDIR)/%.c $(SRCDIR)/include/%.h
|
||||
$(CC) -c $< -o $@ $(CFLAGS)
|
||||
|
||||
$(BUILDDIR)/%.cuda.o: $(SRCDIR)/%.c $(SRCDIR)/include/%.h
|
||||
$(CC) -c $< -o $@ $(CFLAGS) -DUSE_CUDA -lcuda -I/opt/cuda/include
|
||||
|
||||
$(BUILDDIR)/cuda_%.o: $(SRCDIR)/%.cu $(SRCDIR)/include/%.h
|
||||
ifndef NVCC_INSTALLED
|
||||
@echo "$(NVCC) not found, skipping"
|
||||
@ -116,14 +160,18 @@ prepare-tests:
|
||||
@rm -f $(BUILDDIR)/test-*
|
||||
|
||||
|
||||
build/test-cnn_%: test/cnn_%.c $(CNN_OBJ) $(BUILDDIR)/colors.o $(BUILDDIR)/mnist.o
|
||||
build/test-cnn_%: test/cnn_%.c $(CNN_OBJ) $(BUILDDIR)/colors.o $(BUILDDIR)/mnist.o $(BUILDDIR)/utils.o
|
||||
$(CC) $^ -o $@ $(CFLAGS)
|
||||
|
||||
# mnist.o est déjà inclus en tant que mnist_mnist.o
|
||||
build/test-mnist_%: test/mnist_%.c $(MNIST_OBJ) $(BUILDDIR)/colors.o
|
||||
$(CC) $^ -o $@ $(CFLAGS)
|
||||
|
||||
$(BUILDDIR)/test-cnn_%: test/cnn_%.cu $(BUILDDIR)/cnn_cuda_%.o $(BUILDDIR)/cuda_utils.o $(BUILDDIR)/colors.o $(BUILDDIR)/mnist.o
|
||||
$(BUILDDIR)/test-cnn_%: test/cnn_%.cu \
|
||||
$(BUILDDIR)/cnn_cuda_%.o \
|
||||
$(BUILDDIR)/cuda_utils.o \
|
||||
$(BUILDDIR)/colors.cuda.o \
|
||||
$(BUILDDIR)/mnist.cuda.o
|
||||
ifndef NVCC_INSTALLED
|
||||
@echo "$(NVCC) not found, skipping"
|
||||
else
|
||||
|
@ -3,36 +3,8 @@
|
||||
#include <stdbool.h>
|
||||
|
||||
#include "include/struct.h"
|
||||
#ifdef __CUDACC__
|
||||
#include "../include/utils.h"
|
||||
#else
|
||||
bool check_cuda_compatibility() {
|
||||
#ifdef __CUDACC__
|
||||
int nDevices;
|
||||
cudaDeviceProp prop;
|
||||
#include "../include/utils.h"
|
||||
|
||||
cudaGetDeviceCount(&nDevices);
|
||||
if (nDevices == 0) {
|
||||
printf("Pas d'utilisation du GPU\n\n");
|
||||
return false;
|
||||
}
|
||||
|
||||
printf("GPUs disponibles:\n");
|
||||
|
||||
for (int i=0; i < nDevices; i++) {
|
||||
cudaGetDeviceProperties(&prop, i);
|
||||
printf(" - %s\n", prop.name);
|
||||
}
|
||||
|
||||
cudaGetDeviceProperties(&prop, 0);
|
||||
printf("Utilisation du GPU: " BLUE "%s" RESET " (Compute capability: %d.%d)\n\n", prop.name, prop.major, prop.minor);
|
||||
return true;
|
||||
#else
|
||||
printf("Pas d'utilisation du GPU\n\n");
|
||||
return false;
|
||||
#endif
|
||||
}
|
||||
#endif
|
||||
|
||||
#define BLOCKSIZE_x 16
|
||||
#define BLOCKSIZE_y 8
|
||||
@ -64,107 +36,40 @@ void make_convolution_cpu(Kernel_cnn* kernel, float*** input, float*** output, i
|
||||
|
||||
#ifdef __CUDACC__
|
||||
|
||||
__global__ void make_convolution_kernel(int k_size, int columns, int rows, float*** bias, size_t pitch_bias, float**** w, size_t pitch_w, float*** input, size_t pitch_input, float*** output, size_t pitch_output, int output_dim) {
|
||||
__global__ void make_convolution_kernel(Kernel_cnn* kernel, float*** input, float*** output, int output_dim) {
|
||||
// Équivalents respectifs de i, j et k dans la boucle effectuée par le cpu
|
||||
int idx = threadIdx.x + blockDim.x*blockIdx.x; // < kernel->columns
|
||||
int idy = threadIdx.y + blockDim.y*blockIdx.y; // < min(output_dim, k_size)
|
||||
int idz = threadIdx.z + blockDim.z*blockIdx.z; // < min(output_dim, k_size)
|
||||
|
||||
int input_dim = output_dim+k_size - 1;
|
||||
|
||||
if (idx >= columns || idy >= output_dim || idz >= output_dim) {
|
||||
if (idx >= kernel->columns || idy >= output_dim || idz >= output_dim) {
|
||||
return;
|
||||
}
|
||||
|
||||
float* bias_offset;
|
||||
float* w_offset;
|
||||
float* input_offset;
|
||||
float* output_offset;
|
||||
float f = kernel->bias[idx][idy][idz];
|
||||
|
||||
bias_offset = (float*)((char*)bias + (idx*output_dim+idy)*pitch_bias);
|
||||
float f = bias_offset[idz];
|
||||
|
||||
for (int a=0; a < rows; a++) {
|
||||
for (int b=0; b < k_size; b++) {
|
||||
for (int c=0; c < k_size; c++) {
|
||||
w_offset = (float*)((char*)w + ((a*columns + idx)*k_size+b)*pitch_w);
|
||||
input_offset = (float*)((char*)input + (a*input_dim + idy+b)*pitch_input);
|
||||
f += w_offset[c]*input_offset[idz+c];
|
||||
for (int a=0; a < kernel->rows; a++) {
|
||||
for (int b=0; b < kernel->k_size; b++) {
|
||||
for (int c=0; c < kernel->k_size; c++) {
|
||||
f += kernel->w[a][idx][b][c]*input[a][idy+b][idz+c];
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
output_offset = (float*)((char*)output + (idx*output_dim+idy)*pitch_output);
|
||||
output_offset[idz] = f;
|
||||
output[idx][idy][idz] = f;
|
||||
}
|
||||
|
||||
void make_convolution_device(Kernel_cnn* kernel, float*** input, float*** output, int output_dim) {
|
||||
// Copy arrays
|
||||
size_t pitch_input;
|
||||
size_t pitch_output;
|
||||
size_t pitch_bias;
|
||||
size_t pitch_weight;
|
||||
float*** input_dev;
|
||||
float*** output_dev;
|
||||
float*** kernel_bias;
|
||||
float**** kernel_weight;
|
||||
|
||||
int input_dim = output_dim+kernel->k_size - 1;
|
||||
|
||||
// Copy ***input
|
||||
gpuErrchk( cudaMallocPitch((void**)&input_dev, &pitch_input, input_dim*sizeof(float), kernel->rows*input_dim));
|
||||
for (int i=0; i < kernel->rows; i++) {
|
||||
for (int j=0; j < input_dim; j++) {
|
||||
gpuErrchk( cudaMemcpy((void*)((char*)input_dev + (i*input_dim+j)*pitch_input), (const void*)&(input[i][j][0]), input_dim*sizeof(float), cudaMemcpyHostToDevice));
|
||||
}
|
||||
}
|
||||
// cudaMalloc ***output
|
||||
gpuErrchk( cudaMallocPitch((void**)&output_dev, &pitch_output, output_dim*sizeof(float), kernel->columns*output_dim));
|
||||
|
||||
// Copy ***Kernel bias
|
||||
gpuErrchk( cudaMallocPitch((void**)&kernel_bias, &pitch_bias, output_dim*sizeof(float), kernel->columns*output_dim));
|
||||
for (int i=0; i < kernel->columns; i++) {
|
||||
for (int j=0; j < output_dim; j++) {
|
||||
gpuErrchk( cudaMemcpy((void*)((char*)kernel_bias + (i*output_dim+j)*pitch_bias), (const void*)&(kernel->bias[i][j][0]), output_dim*sizeof(float), cudaMemcpyHostToDevice));
|
||||
}
|
||||
}
|
||||
|
||||
// Copy ****Kernel weights
|
||||
gpuErrchk( cudaMallocPitch((void**)&kernel_weight, &pitch_weight, kernel->k_size*sizeof(float), (kernel->rows*kernel->columns*kernel->k_size)));
|
||||
for (int i=0; i < kernel->rows; i++) {
|
||||
for (int j=0; j < kernel->columns; j++) {
|
||||
for (int k=0; k < kernel->k_size; k++) {
|
||||
gpuErrchk( cudaMemcpy((void*)((char*)kernel_weight + ((i*kernel->columns+j)*kernel->k_size+k)*pitch_weight), (const void*)&(kernel->w[i][j][k][0]), kernel->k_size*sizeof(float), cudaMemcpyHostToDevice));
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// Make computation
|
||||
dim3 gridSize(i_div_up(kernel->columns, BLOCKSIZE_x), i_div_up(output_dim, BLOCKSIZE_y), i_div_up(output_dim, BLOCKSIZE_z));
|
||||
dim3 blockSize(BLOCKSIZE_x, BLOCKSIZE_y, BLOCKSIZE_z);
|
||||
|
||||
make_convolution_kernel<<<gridSize, blockSize>>>(kernel->k_size, kernel->columns, kernel->rows, kernel_bias, pitch_bias, kernel_weight, pitch_weight, input_dev, pitch_input, output_dev, pitch_output, output_dim);
|
||||
gpuErrchk( cudaPeekAtLastError() );
|
||||
gpuErrchk( cudaDeviceSynchronize() );
|
||||
|
||||
// Copy output back
|
||||
for (int i=0; i < kernel->columns; i++) {
|
||||
for (int j=0; j < output_dim; j++) {
|
||||
gpuErrchk( cudaMemcpy((void*)&(output[i][j][0]), (const void*)((char*)output_dev + (i*output_dim+j)*pitch_output), output_dim*sizeof(float), cudaMemcpyDeviceToHost));
|
||||
}
|
||||
}
|
||||
|
||||
// Free all the allocated memory
|
||||
gpuErrchk( cudaFree(input_dev) );
|
||||
gpuErrchk( cudaFree(output_dev) );
|
||||
gpuErrchk( cudaFree(kernel_bias) );
|
||||
gpuErrchk( cudaFree(kernel_weight) );
|
||||
make_convolution_kernel<<<gridSize, blockSize>>>(kernel, input, output, output_dim);
|
||||
gpuErrchk( cudaPeekAtLastError() );
|
||||
gpuErrchk( cudaDeviceSynchronize() );
|
||||
}
|
||||
#endif
|
||||
|
||||
|
||||
void make_convolution(Kernel_cnn* kernel, float*** input, float*** output, int output_dim) {
|
||||
#ifndef __CUDACC__
|
||||
make_convolution_cpu(kernel, input, output, output_dim);
|
||||
|
@ -3,36 +3,8 @@
|
||||
#include <stdbool.h>
|
||||
|
||||
#include "include/struct.h"
|
||||
#ifdef __CUDACC__
|
||||
#include "../include/utils.h"
|
||||
#else
|
||||
bool check_cuda_compatibility() {
|
||||
#ifdef __CUDACC__
|
||||
int nDevices;
|
||||
cudaDeviceProp prop;
|
||||
#include "../include/utils.h"
|
||||
|
||||
cudaGetDeviceCount(&nDevices);
|
||||
if (nDevices == 0) {
|
||||
printf("Pas d'utilisation du GPU\n\n");
|
||||
return false;
|
||||
}
|
||||
|
||||
printf("GPUs disponibles:\n");
|
||||
|
||||
for (int i=0; i < nDevices; i++) {
|
||||
cudaGetDeviceProperties(&prop, i);
|
||||
printf(" - %s\n", prop.name);
|
||||
}
|
||||
|
||||
cudaGetDeviceProperties(&prop, 0);
|
||||
printf("Utilisation du GPU: " BLUE "%s" RESET " (Compute capability: %d.%d)\n\n", prop.name, prop.major, prop.minor);
|
||||
return true;
|
||||
#else
|
||||
printf("Pas d'utilisation du GPU\n\n");
|
||||
return false;
|
||||
#endif
|
||||
}
|
||||
#endif
|
||||
|
||||
#define BLOCKSIZE_x 16
|
||||
#define BLOCKSIZE_y 8
|
||||
@ -64,101 +36,35 @@ void make_convolution_cpu(Kernel_cnn* kernel, float*** input, float*** output, i
|
||||
|
||||
#ifdef __CUDACC__
|
||||
|
||||
__global__ void make_convolution_kernel(int k_size, int columns, int rows, float*** bias, size_t pitch_bias, float**** w, size_t pitch_w, float*** input, size_t pitch_input, float*** output, size_t pitch_output, int output_dim) {
|
||||
__global__ void make_convolution_kernel(Kernel_cnn* kernel, float*** input, float*** output, int output_dim) {
|
||||
// Équivalents respectifs de i, j et k dans la boucle effectuée par le cpu
|
||||
int idx = threadIdx.x + blockDim.x*blockIdx.x; // < kernel->columns
|
||||
int idy = threadIdx.y + blockDim.y*blockIdx.y; // < min(output_dim, k_size)
|
||||
int idz = threadIdx.z + blockDim.z*blockIdx.z; // < min(output_dim, k_size)
|
||||
|
||||
int input_dim = output_dim+k_size - 1;
|
||||
|
||||
if (idx >= columns || idy >= output_dim || idz >= output_dim) {
|
||||
if (idx >= kernel->columns || idy >= output_dim || idz >= output_dim) {
|
||||
return;
|
||||
}
|
||||
|
||||
float* bias_offset;
|
||||
float* w_offset;
|
||||
float* input_offset;
|
||||
float* output_offset;
|
||||
float f = kernel->bias[idx][idy][idz];
|
||||
|
||||
bias_offset = (float*)((char*)bias + (idx*output_dim+idy)*pitch_bias);
|
||||
float f = bias_offset[idz];
|
||||
|
||||
for (int a=0; a < rows; a++) {
|
||||
for (int b=0; b < k_size; b++) {
|
||||
for (int c=0; c < k_size; c++) {
|
||||
w_offset = (float*)((char*)w + ((a*columns + idx)*k_size+b)*pitch_w);
|
||||
input_offset = (float*)((char*)input + (a*input_dim + idy+b)*pitch_input);
|
||||
f += w_offset[c]*input_offset[idz+c];
|
||||
for (int a=0; a < kernel->rows; a++) {
|
||||
for (int b=0; b < kernel->k_size; b++) {
|
||||
for (int c=0; c < kernel->k_size; c++) {
|
||||
f += kernel->w[a][idx][b][c]*input[a][idy+b][idz+c];
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
output_offset = (float*)((char*)output + (idx*output_dim+idy)*pitch_output);
|
||||
output_offset[idz] = f;
|
||||
output[idx][idy][idz] = f;
|
||||
}
|
||||
|
||||
void make_convolution_device(Kernel_cnn* kernel, float*** input, float*** output, int output_dim) {
|
||||
// Copy arrays
|
||||
size_t pitch_input;
|
||||
size_t pitch_output;
|
||||
size_t pitch_bias;
|
||||
size_t pitch_weight;
|
||||
float*** input_dev;
|
||||
float*** output_dev;
|
||||
float*** kernel_bias;
|
||||
float**** kernel_weight;
|
||||
|
||||
int input_dim = output_dim+kernel->k_size - 1;
|
||||
|
||||
// Copy ***input
|
||||
gpuErrchk( cudaMallocPitch((void**)&input_dev, &pitch_input, input_dim*sizeof(float), kernel->rows*input_dim));
|
||||
for (int i=0; i < kernel->rows; i++) {
|
||||
for (int j=0; j < input_dim; j++) {
|
||||
gpuErrchk( cudaMemcpy((void*)((char*)input_dev + (i*input_dim+j)*pitch_input), (const void*)&(input[i][j][0]), input_dim*sizeof(float), cudaMemcpyHostToDevice));
|
||||
}
|
||||
}
|
||||
// cudaMalloc ***output
|
||||
gpuErrchk( cudaMallocPitch((void**)&output_dev, &pitch_output, output_dim*sizeof(float), kernel->columns*output_dim));
|
||||
|
||||
// Copy ***Kernel bias
|
||||
gpuErrchk( cudaMallocPitch((void**)&kernel_bias, &pitch_bias, output_dim*sizeof(float), kernel->columns*output_dim));
|
||||
for (int i=0; i < kernel->columns; i++) {
|
||||
for (int j=0; j < output_dim; j++) {
|
||||
gpuErrchk( cudaMemcpy((void*)((char*)kernel_bias + (i*output_dim+j)*pitch_bias), (const void*)&(kernel->bias[i][j][0]), output_dim*sizeof(float), cudaMemcpyHostToDevice));
|
||||
}
|
||||
}
|
||||
|
||||
// Copy ****Kernel weights
|
||||
gpuErrchk( cudaMallocPitch((void**)&kernel_weight, &pitch_weight, kernel->k_size*sizeof(float), (kernel->rows*kernel->columns*kernel->k_size)));
|
||||
for (int i=0; i < kernel->rows; i++) {
|
||||
for (int j=0; j < kernel->columns; j++) {
|
||||
for (int k=0; k < kernel->k_size; k++) {
|
||||
gpuErrchk( cudaMemcpy((void*)((char*)kernel_weight + ((i*kernel->columns+j)*kernel->k_size+k)*pitch_weight), (const void*)&(kernel->w[i][j][k][0]), kernel->k_size*sizeof(float), cudaMemcpyHostToDevice));
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// Make computation
|
||||
dim3 gridSize(i_div_up(kernel->columns, BLOCKSIZE_x), i_div_up(output_dim, BLOCKSIZE_y), i_div_up(output_dim, BLOCKSIZE_z));
|
||||
dim3 blockSize(BLOCKSIZE_x, BLOCKSIZE_y, BLOCKSIZE_z);
|
||||
|
||||
make_convolution_kernel<<<gridSize, blockSize>>>(kernel->k_size, kernel->columns, kernel->rows, kernel_bias, pitch_bias, kernel_weight, pitch_weight, input_dev, pitch_input, output_dev, pitch_output, output_dim);
|
||||
gpuErrchk( cudaPeekAtLastError() );
|
||||
gpuErrchk( cudaDeviceSynchronize() );
|
||||
|
||||
// Copy output back
|
||||
for (int i=0; i < kernel->columns; i++) {
|
||||
for (int j=0; j < output_dim; j++) {
|
||||
gpuErrchk( cudaMemcpy((void*)&(output[i][j][0]), (const void*)((char*)output_dev + (i*output_dim+j)*pitch_output), output_dim*sizeof(float), cudaMemcpyDeviceToHost));
|
||||
}
|
||||
}
|
||||
|
||||
// Free all the allocated memory
|
||||
gpuErrchk( cudaFree(input_dev) );
|
||||
gpuErrchk( cudaFree(output_dev) );
|
||||
gpuErrchk( cudaFree(kernel_bias) );
|
||||
gpuErrchk( cudaFree(kernel_weight) );
|
||||
make_convolution_kernel<<<gridSize, blockSize>>>(kernel, input, output, output_dim);
|
||||
gpuErrchk( cudaPeekAtLastError() );
|
||||
gpuErrchk( cudaDeviceSynchronize() );
|
||||
}
|
||||
|
@ -3,6 +3,7 @@
|
||||
|
||||
#include "include/initialisation.h"
|
||||
#include "include/function.h"
|
||||
#include "../include/utils.h"
|
||||
|
||||
#include "include/creation.h"
|
||||
|
||||
@ -10,19 +11,19 @@ Network* create_network(int max_size, float learning_rate, int dropout, int init
|
||||
if (dropout < 0 || dropout > 100) {
|
||||
printf("Erreur, la probabilité de dropout n'est pas respecté, elle doit être comprise entre 0 et 100\n");
|
||||
}
|
||||
Network* network = (Network*)malloc(sizeof(Network));
|
||||
Network* network = (Network*)nalloc(sizeof(Network));
|
||||
network->learning_rate = learning_rate;
|
||||
network->max_size = max_size;
|
||||
network->dropout = dropout;
|
||||
network->initialisation = initialisation;
|
||||
network->size = 1;
|
||||
network->input = (float****)malloc(sizeof(float***)*max_size);
|
||||
network->input_z = (float****)malloc(sizeof(float***)*max_size);
|
||||
network->kernel = (Kernel**)malloc(sizeof(Kernel*)*(max_size-1));
|
||||
network->width = (int*)malloc(sizeof(int*)*max_size);
|
||||
network->depth = (int*)malloc(sizeof(int*)*max_size);
|
||||
network->input = (float****)nalloc(sizeof(float***)*max_size);
|
||||
network->input_z = (float****)nalloc(sizeof(float***)*max_size);
|
||||
network->kernel = (Kernel**)nalloc(sizeof(Kernel*)*(max_size-1));
|
||||
network->width = (int*)nalloc(sizeof(int*)*max_size);
|
||||
network->depth = (int*)nalloc(sizeof(int*)*max_size);
|
||||
for (int i=0; i < max_size-1; i++) {
|
||||
network->kernel[i] = (Kernel*)malloc(sizeof(Kernel));
|
||||
network->kernel[i] = (Kernel*)nalloc(sizeof(Kernel));
|
||||
}
|
||||
network->width[0] = input_dim;
|
||||
network->depth[0] = input_depth;
|
||||
@ -57,11 +58,11 @@ Network* create_simple_one(float learning_rate, int dropout, int activation, int
|
||||
}
|
||||
|
||||
void create_a_cube_input_layer(Network* network, int pos, int depth, int dim) {
|
||||
network->input[pos] = (float***)malloc(sizeof(float**)*depth);
|
||||
network->input[pos] = (float***)nalloc(sizeof(float**)*depth);
|
||||
for (int i=0; i < depth; i++) {
|
||||
network->input[pos][i] = (float**)malloc(sizeof(float*)*dim);
|
||||
network->input[pos][i] = (float**)nalloc(sizeof(float*)*dim);
|
||||
for (int j=0; j < dim; j++) {
|
||||
network->input[pos][i][j] = (float*)malloc(sizeof(float)*dim);
|
||||
network->input[pos][i][j] = (float*)nalloc(sizeof(float)*dim);
|
||||
}
|
||||
}
|
||||
network->width[pos] = dim;
|
||||
@ -69,11 +70,11 @@ void create_a_cube_input_layer(Network* network, int pos, int depth, int dim) {
|
||||
}
|
||||
|
||||
void create_a_cube_input_z_layer(Network* network, int pos, int depth, int dim) {
|
||||
network->input_z[pos] = (float***)malloc(sizeof(float**)*depth);
|
||||
network->input_z[pos] = (float***)nalloc(sizeof(float**)*depth);
|
||||
for (int i=0; i < depth; i++) {
|
||||
network->input_z[pos][i] = (float**)malloc(sizeof(float*)*dim);
|
||||
network->input_z[pos][i] = (float**)nalloc(sizeof(float*)*dim);
|
||||
for (int j=0; j < dim; j++) {
|
||||
network->input_z[pos][i][j] = (float*)malloc(sizeof(float)*dim);
|
||||
network->input_z[pos][i][j] = (float*)nalloc(sizeof(float)*dim);
|
||||
}
|
||||
}
|
||||
network->width[pos] = dim;
|
||||
@ -81,17 +82,17 @@ void create_a_cube_input_z_layer(Network* network, int pos, int depth, int dim)
|
||||
}
|
||||
|
||||
void create_a_line_input_layer(Network* network, int pos, int dim) {
|
||||
network->input[pos] = (float***)malloc(sizeof(float**));
|
||||
network->input[pos][0] = (float**)malloc(sizeof(float*));
|
||||
network->input[pos][0][0] = (float*)malloc(sizeof(float)*dim);
|
||||
network->input[pos] = (float***)nalloc(sizeof(float**));
|
||||
network->input[pos][0] = (float**)nalloc(sizeof(float*));
|
||||
network->input[pos][0][0] = (float*)nalloc(sizeof(float)*dim);
|
||||
network->width[pos] = dim;
|
||||
network->depth[pos] = 1;
|
||||
}
|
||||
|
||||
void create_a_line_input_z_layer(Network* network, int pos, int dim) {
|
||||
network->input_z[pos] = (float***)malloc(sizeof(float**));
|
||||
network->input_z[pos][0] = (float**)malloc(sizeof(float*));
|
||||
network->input_z[pos][0][0] = (float*)malloc(sizeof(float)*dim);
|
||||
network->input_z[pos] = (float***)nalloc(sizeof(float**));
|
||||
network->input_z[pos][0] = (float**)nalloc(sizeof(float*));
|
||||
network->input_z[pos][0][0] = (float*)nalloc(sizeof(float)*dim);
|
||||
network->width[pos] = dim;
|
||||
network->depth[pos] = 1;
|
||||
}
|
||||
@ -132,34 +133,40 @@ void add_convolution(Network* network, int depth_output, int dim_output, int act
|
||||
network->kernel[k_pos]->nn = NULL;
|
||||
network->kernel[k_pos]->activation = activation;
|
||||
network->kernel[k_pos]->linearisation = 0;
|
||||
network->kernel[k_pos]->cnn = (Kernel_cnn*)malloc(sizeof(Kernel_cnn));
|
||||
network->kernel[k_pos]->cnn = (Kernel_cnn*)nalloc(sizeof(Kernel_cnn));
|
||||
Kernel_cnn* cnn = network->kernel[k_pos]->cnn;
|
||||
|
||||
cnn->k_size = kernel_size;
|
||||
cnn->rows = depth_input;
|
||||
cnn->columns = depth_output;
|
||||
cnn->w = (float****)malloc(sizeof(float***)*depth_input);
|
||||
cnn->d_w = (float****)malloc(sizeof(float***)*depth_input);
|
||||
cnn->w = (float****)nalloc(sizeof(float***)*depth_input);
|
||||
cnn->d_w = (float****)nalloc(sizeof(float***)*depth_input);
|
||||
for (int i=0; i < depth_input; i++) {
|
||||
cnn->w[i] = (float***)malloc(sizeof(float**)*depth_output);
|
||||
cnn->d_w[i] = (float***)malloc(sizeof(float**)*depth_output);
|
||||
cnn->w[i] = (float***)nalloc(sizeof(float**)*depth_output);
|
||||
cnn->d_w[i] = (float***)nalloc(sizeof(float**)*depth_output);
|
||||
for (int j=0; j < depth_output; j++) {
|
||||
cnn->w[i][j] = (float**)malloc(sizeof(float*)*kernel_size);
|
||||
cnn->d_w[i][j] = (float**)malloc(sizeof(float*)*kernel_size);
|
||||
cnn->w[i][j] = (float**)nalloc(sizeof(float*)*kernel_size);
|
||||
cnn->d_w[i][j] = (float**)nalloc(sizeof(float*)*kernel_size);
|
||||
for (int k=0; k < kernel_size; k++) {
|
||||
cnn->w[i][j][k] = (float*)malloc(sizeof(float)*kernel_size);
|
||||
cnn->d_w[i][j][k] = (float*)calloc(kernel_size, sizeof(float));
|
||||
cnn->w[i][j][k] = (float*)nalloc(sizeof(float)*kernel_size);
|
||||
cnn->d_w[i][j][k] = (float*)nalloc(sizeof(float)*kernel_size);
|
||||
for (int l=0; l < kernel_size; l++) {
|
||||
cnn->d_w[i][j][k][l] = 0.;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
cnn->bias = (float***)malloc(sizeof(float**)*depth_output);
|
||||
cnn->d_bias = (float***)malloc(sizeof(float**)*depth_output);
|
||||
cnn->bias = (float***)nalloc(sizeof(float**)*depth_output);
|
||||
cnn->d_bias = (float***)nalloc(sizeof(float**)*depth_output);
|
||||
for (int i=0; i < depth_output; i++) {
|
||||
cnn->bias[i] = (float**)malloc(sizeof(float*)*bias_size);
|
||||
cnn->d_bias[i] = (float**)malloc(sizeof(float*)*bias_size);
|
||||
cnn->bias[i] = (float**)nalloc(sizeof(float*)*bias_size);
|
||||
cnn->d_bias[i] = (float**)nalloc(sizeof(float*)*bias_size);
|
||||
for (int j=0; j < bias_size; j++) {
|
||||
cnn->bias[i][j] = (float*)malloc(sizeof(float)*bias_size);
|
||||
cnn->d_bias[i][j] = (float*)calloc(bias_size, sizeof(float));
|
||||
cnn->bias[i][j] = (float*)nalloc(sizeof(float)*bias_size);
|
||||
cnn->d_bias[i][j] = (float*)nalloc(sizeof(float)*bias_size);
|
||||
for (int k=0; k < bias_size; k++) {
|
||||
cnn->d_bias[i][j][k] = 0.;
|
||||
}
|
||||
}
|
||||
}
|
||||
int n_in = network->width[n-1]*network->width[n-1]*network->depth[n-1];
|
||||
@ -180,20 +187,29 @@ void add_dense(Network* network, int output_units, int activation) {
|
||||
return;
|
||||
}
|
||||
network->kernel[k_pos]->cnn = NULL;
|
||||
network->kernel[k_pos]->nn = (Kernel_nn*)malloc(sizeof(Kernel_nn));
|
||||
network->kernel[k_pos]->nn = (Kernel_nn*)nalloc(sizeof(Kernel_nn));
|
||||
Kernel_nn* nn = network->kernel[k_pos]->nn;
|
||||
network->kernel[k_pos]->activation = activation;
|
||||
network->kernel[k_pos]->linearisation = 0;
|
||||
|
||||
nn->input_units = input_units;
|
||||
nn->output_units = output_units;
|
||||
nn->bias = (float*)malloc(sizeof(float)*output_units);
|
||||
nn->d_bias = (float*)calloc(output_units, sizeof(float));
|
||||
nn->weights = (float**)malloc(sizeof(float*)*input_units);
|
||||
nn->d_weights = (float**)malloc(sizeof(float*)*input_units);
|
||||
for (int i=0; i < input_units; i++) {
|
||||
nn->weights[i] = (float*)malloc(sizeof(float)*output_units);
|
||||
nn->d_weights[i] = (float*)calloc(output_units, sizeof(float));
|
||||
nn->bias = (float*)nalloc(sizeof(float)*output_units);
|
||||
nn->d_bias = (float*)nalloc(sizeof(float)*output_units);
|
||||
for (int i=0; i < output_units; i++) {
|
||||
nn->d_bias[i] = 0.;
|
||||
}
|
||||
|
||||
nn->weights = (float**)nalloc(sizeof(float*)*input_units);
|
||||
nn->d_weights = (float**)nalloc(sizeof(float*)*input_units);
|
||||
for (int i=0; i < input_units; i++) {
|
||||
nn->weights[i] = (float*)nalloc(sizeof(float)*output_units);
|
||||
nn->d_weights[i] = (float*)nalloc(sizeof(float)*output_units);
|
||||
for (int j=0; j < output_units; j++) {
|
||||
nn->d_weights[i][j] = 0.;
|
||||
}
|
||||
}
|
||||
|
||||
initialisation_1d_matrix(network->initialisation, nn->bias, output_units, input_units, output_units);
|
||||
initialisation_2d_matrix(network->initialisation, nn->weights, input_units, output_units, input_units, output_units);
|
||||
create_a_line_input_layer(network, n, output_units);
|
||||
@ -212,20 +228,26 @@ void add_dense_linearisation(Network* network, int output_units, int activation)
|
||||
return;
|
||||
}
|
||||
network->kernel[k_pos]->cnn = NULL;
|
||||
network->kernel[k_pos]->nn = (Kernel_nn*)malloc(sizeof(Kernel_nn));
|
||||
network->kernel[k_pos]->nn = (Kernel_nn*)nalloc(sizeof(Kernel_nn));
|
||||
Kernel_nn* nn = network->kernel[k_pos]->nn;
|
||||
network->kernel[k_pos]->activation = activation;
|
||||
network->kernel[k_pos]->linearisation = 1;
|
||||
nn->input_units = input_units;
|
||||
nn->output_units = output_units;
|
||||
|
||||
nn->bias = (float*)malloc(sizeof(float)*output_units);
|
||||
nn->d_bias = (float*)calloc(output_units, sizeof(float));
|
||||
nn->weights = (float**)malloc(sizeof(float*)*input_units);
|
||||
nn->d_weights = (float**)malloc(sizeof(float*)*input_units);
|
||||
nn->bias = (float*)nalloc(sizeof(float)*output_units);
|
||||
nn->d_bias = (float*)nalloc(sizeof(float)*output_units);
|
||||
for (int i=0; i < output_units; i++) {
|
||||
nn->d_bias[i] = 0.;
|
||||
}
|
||||
nn->weights = (float**)nalloc(sizeof(float*)*input_units);
|
||||
nn->d_weights = (float**)nalloc(sizeof(float*)*input_units);
|
||||
for (int i=0; i < input_units; i++) {
|
||||
nn->weights[i] = (float*)malloc(sizeof(float)*output_units);
|
||||
nn->d_weights[i] = (float*)calloc(output_units, sizeof(float));
|
||||
nn->weights[i] = (float*)nalloc(sizeof(float)*output_units);
|
||||
nn->d_weights[i] = (float*)nalloc(sizeof(float)*output_units);
|
||||
for (int j=0; j < output_units; j++) {
|
||||
nn->d_weights[i][j] = 0.;
|
||||
}
|
||||
}
|
||||
initialisation_1d_matrix(network->initialisation, nn->bias, output_units, input_units, output_units);
|
||||
initialisation_2d_matrix(network->initialisation, nn->weights, input_units, output_units, input_units, output_units);
|
||||
|
@ -1,28 +1,30 @@
|
||||
#include <stdlib.h>
|
||||
#include <stdio.h>
|
||||
|
||||
#include "../include/utils.h"
|
||||
|
||||
#include "include/free.h"
|
||||
|
||||
void free_a_cube_input_layer(Network* network, int pos, int depth, int dim) {
|
||||
for (int i=0; i < depth; i++) {
|
||||
for (int j=0; j < dim; j++) {
|
||||
free(network->input[pos][i][j]);
|
||||
free(network->input_z[pos][i][j]);
|
||||
gree(network->input[pos][i][j]);
|
||||
gree(network->input_z[pos][i][j]);
|
||||
}
|
||||
free(network->input[pos][i]);
|
||||
free(network->input_z[pos][i]);
|
||||
gree(network->input[pos][i]);
|
||||
gree(network->input_z[pos][i]);
|
||||
}
|
||||
free(network->input[pos]);
|
||||
free(network->input_z[pos]);
|
||||
gree(network->input[pos]);
|
||||
gree(network->input_z[pos]);
|
||||
}
|
||||
|
||||
void free_a_line_input_layer(Network* network, int pos) {
|
||||
free(network->input[pos][0][0]);
|
||||
free(network->input_z[pos][0][0]);
|
||||
free(network->input[pos][0]);
|
||||
free(network->input_z[pos][0]);
|
||||
free(network->input[pos]);
|
||||
free(network->input_z[pos]);
|
||||
gree(network->input[pos][0][0]);
|
||||
gree(network->input_z[pos][0][0]);
|
||||
gree(network->input[pos][0]);
|
||||
gree(network->input_z[pos][0]);
|
||||
gree(network->input[pos]);
|
||||
gree(network->input_z[pos]);
|
||||
}
|
||||
|
||||
void free_2d_average_pooling(Network* network, int pos) {
|
||||
@ -38,31 +40,31 @@ void free_convolution(Network* network, int pos) {
|
||||
free_a_cube_input_layer(network, pos+1, network->depth[pos+1], network->width[pos+1]);
|
||||
for (int i=0; i < c; i++) {
|
||||
for (int j=0; j < bias_size; j++) {
|
||||
free(k_pos->bias[i][j]);
|
||||
free(k_pos->d_bias[i][j]);
|
||||
gree(k_pos->bias[i][j]);
|
||||
gree(k_pos->d_bias[i][j]);
|
||||
}
|
||||
free(k_pos->bias[i]);
|
||||
free(k_pos->d_bias[i]);
|
||||
gree(k_pos->bias[i]);
|
||||
gree(k_pos->d_bias[i]);
|
||||
}
|
||||
free(k_pos->bias);
|
||||
free(k_pos->d_bias);
|
||||
gree(k_pos->bias);
|
||||
gree(k_pos->d_bias);
|
||||
|
||||
for (int i=0; i < r; i++) {
|
||||
for (int j=0; j < c; j++) {
|
||||
for (int k=0; k < k_size; k++) {
|
||||
free(k_pos->w[i][j][k]);
|
||||
free(k_pos->d_w[i][j][k]);
|
||||
gree(k_pos->w[i][j][k]);
|
||||
gree(k_pos->d_w[i][j][k]);
|
||||
}
|
||||
free(k_pos->w[i][j]);
|
||||
free(k_pos->d_w[i][j]);
|
||||
gree(k_pos->w[i][j]);
|
||||
gree(k_pos->d_w[i][j]);
|
||||
}
|
||||
free(k_pos->w[i]);
|
||||
free(k_pos->d_w[i]);
|
||||
gree(k_pos->w[i]);
|
||||
gree(k_pos->d_w[i]);
|
||||
}
|
||||
free(k_pos->w);
|
||||
free(k_pos->d_w);
|
||||
gree(k_pos->w);
|
||||
gree(k_pos->d_w);
|
||||
|
||||
free(k_pos);
|
||||
gree(k_pos);
|
||||
}
|
||||
|
||||
void free_dense(Network* network, int pos) {
|
||||
@ -70,16 +72,16 @@ void free_dense(Network* network, int pos) {
|
||||
Kernel_nn* k_pos = network->kernel[pos]->nn;
|
||||
int dim = k_pos->input_units;
|
||||
for (int i=0; i < dim; i++) {
|
||||
free(k_pos->weights[i]);
|
||||
free(k_pos->d_weights[i]);
|
||||
gree(k_pos->weights[i]);
|
||||
gree(k_pos->d_weights[i]);
|
||||
}
|
||||
free(k_pos->weights);
|
||||
free(k_pos->d_weights);
|
||||
gree(k_pos->weights);
|
||||
gree(k_pos->d_weights);
|
||||
|
||||
free(k_pos->bias);
|
||||
free(k_pos->d_bias);
|
||||
gree(k_pos->bias);
|
||||
gree(k_pos->d_bias);
|
||||
|
||||
free(k_pos);
|
||||
gree(k_pos);
|
||||
}
|
||||
|
||||
void free_dense_linearisation(Network* network, int pos) {
|
||||
@ -87,29 +89,29 @@ void free_dense_linearisation(Network* network, int pos) {
|
||||
Kernel_nn* k_pos = network->kernel[pos]->nn;
|
||||
int dim = k_pos->input_units;
|
||||
for (int i=0; i < dim; i++) {
|
||||
free(k_pos->weights[i]);
|
||||
free(k_pos->d_weights[i]);
|
||||
gree(k_pos->weights[i]);
|
||||
gree(k_pos->d_weights[i]);
|
||||
}
|
||||
free(k_pos->weights);
|
||||
free(k_pos->d_weights);
|
||||
gree(k_pos->weights);
|
||||
gree(k_pos->d_weights);
|
||||
|
||||
free(k_pos->bias);
|
||||
free(k_pos->d_bias);
|
||||
gree(k_pos->bias);
|
||||
gree(k_pos->d_bias);
|
||||
|
||||
free(k_pos);
|
||||
gree(k_pos);
|
||||
}
|
||||
|
||||
void free_network_creation(Network* network) {
|
||||
free_a_cube_input_layer(network, 0, network->depth[0], network->width[0]);
|
||||
for (int i=0; i < network->max_size-1; i++)
|
||||
free(network->kernel[i]);
|
||||
free(network->width);
|
||||
free(network->depth);
|
||||
free(network->kernel);
|
||||
free(network->input);
|
||||
free(network->input_z);
|
||||
gree(network->kernel[i]);
|
||||
gree(network->width);
|
||||
gree(network->depth);
|
||||
gree(network->kernel);
|
||||
gree(network->input);
|
||||
gree(network->input_z);
|
||||
|
||||
free(network);
|
||||
gree(network);
|
||||
}
|
||||
|
||||
void free_network(Network* network) {
|
||||
|
@ -6,6 +6,7 @@
|
||||
#include <jpeglib.h>
|
||||
|
||||
#include "include/jpeg.h"
|
||||
#include "../include/utils.h"
|
||||
#include "../include/colors.h"
|
||||
|
||||
// How to load a JPEG using libjpeg: https://www.tspi.at/2020/03/20/libjpegexample.html
|
||||
@ -52,9 +53,9 @@ imgRawImage* loadJpegImageFile(char* lpFilename) {
|
||||
#endif
|
||||
|
||||
dwBufferBytes = imgWidth * imgHeight * 3; /* We only read RGB, not A */
|
||||
lpData = (unsigned char*)malloc(sizeof(unsigned char)*dwBufferBytes);
|
||||
lpData = (unsigned char*)nalloc(sizeof(unsigned char)*dwBufferBytes);
|
||||
|
||||
lpNewImage = (imgRawImage*)malloc(sizeof(imgRawImage));
|
||||
lpNewImage = (imgRawImage*)nalloc(sizeof(imgRawImage));
|
||||
lpNewImage->numComponents = numComponents;
|
||||
lpNewImage->width = imgWidth;
|
||||
lpNewImage->height = imgHeight;
|
||||
@ -74,7 +75,7 @@ imgRawImage* loadJpegImageFile(char* lpFilename) {
|
||||
}
|
||||
|
||||
jpegDataset* loadJpegDataset(char* folderPath) {
|
||||
jpegDataset* dataset = (jpegDataset*)malloc(sizeof(jpegDataset));
|
||||
jpegDataset* dataset = (jpegDataset*)nalloc(sizeof(jpegDataset));
|
||||
imgRawImage* image;
|
||||
|
||||
// We start by counting the number of images and categories
|
||||
@ -82,8 +83,8 @@ jpegDataset* loadJpegDataset(char* folderPath) {
|
||||
dataset->numImages = countFiles(folderPath);
|
||||
|
||||
dataset->images = NULL;
|
||||
dataset->labels = (unsigned int*)malloc(sizeof(unsigned int)*dataset->numImages);
|
||||
dataset->fileNames = (char**)malloc(sizeof(char*)*dataset->numImages);
|
||||
dataset->labels = (unsigned int*)nalloc(sizeof(unsigned int)*dataset->numImages);
|
||||
dataset->fileNames = (char**)nalloc(sizeof(char*)*dataset->numImages);
|
||||
|
||||
DIR* dirp;
|
||||
struct dirent* entry;
|
||||
@ -96,17 +97,17 @@ jpegDataset* loadJpegDataset(char* folderPath) {
|
||||
if (strcmp(entry->d_name, ".")&&strcmp(entry->d_name, "..")) {
|
||||
if (entry->d_type == DT_DIR) {
|
||||
prev_index = index;
|
||||
concatenated_path = malloc(strlen(folderPath)+strlen(entry->d_name)+2);
|
||||
concatenated_path = nalloc(strlen(folderPath)+strlen(entry->d_name)+2);
|
||||
sprintf(concatenated_path, "%s/%s", folderPath, entry->d_name);
|
||||
addFilenamesToArray(concatenated_path, dataset->fileNames, &index);
|
||||
for (int i=prev_index; i < index; i++) {
|
||||
dataset->labels[i] = getLabel(entry->d_name);
|
||||
}
|
||||
free(concatenated_path);
|
||||
gree(concatenated_path);
|
||||
}
|
||||
}
|
||||
}
|
||||
dataset->images = (unsigned char**)malloc(sizeof(unsigned char*)*dataset->numImages);
|
||||
dataset->images = (unsigned char**)nalloc(sizeof(unsigned char*)*dataset->numImages);
|
||||
for (int i=0; i < (int)dataset->numImages; i++) {
|
||||
dataset->images[i] = NULL;
|
||||
#ifdef STORE_IMAGES_TO_RAM
|
||||
@ -116,7 +117,7 @@ jpegDataset* loadJpegDataset(char* folderPath) {
|
||||
}
|
||||
image = loadJpegImageFile(dataset->fileNames[i]);
|
||||
dataset->images[i] = image->lpData;
|
||||
free(image);
|
||||
gree(image);
|
||||
#endif
|
||||
}
|
||||
#ifdef STORE_IMAGES_TO_RAM
|
||||
@ -129,8 +130,8 @@ jpegDataset* loadJpegDataset(char* folderPath) {
|
||||
dataset->height = image->height;
|
||||
dataset->numComponents = image->numComponents;
|
||||
|
||||
free(image->lpData);
|
||||
free(image);
|
||||
gree(image->lpData);
|
||||
gree(image);
|
||||
|
||||
closedir(dirp);
|
||||
return dataset;
|
||||
@ -184,7 +185,7 @@ void addFilenamesToArray(char* path, char** array, int* index) {
|
||||
dirp = opendir(path); /* There should be error handling after this */
|
||||
while ((entry = readdir(dirp)) != NULL) {
|
||||
if (entry->d_type == DT_REG) { /* If the entry is a regular file */
|
||||
filename = (char*)malloc(strlen(path)+strlen(entry->d_name)+2);
|
||||
filename = (char*)nalloc(strlen(path)+strlen(entry->d_name)+2);
|
||||
sprintf(filename, "%s/%s", path, entry->d_name);
|
||||
array[i] = filename;
|
||||
i++;
|
||||
@ -196,15 +197,15 @@ void addFilenamesToArray(char* path, char** array, int* index) {
|
||||
|
||||
void free_dataset(jpegDataset* dataset) {
|
||||
for (int i=0; i < (int)dataset->numImages; i++) {
|
||||
free(dataset->fileNames[i]);
|
||||
gree(dataset->fileNames[i]);
|
||||
#ifdef STORE_IMAGES_TO_RAM
|
||||
free(dataset->images[i]);
|
||||
gree(dataset->images[i]);
|
||||
#endif
|
||||
}
|
||||
free(dataset->fileNames);
|
||||
free(dataset->labels);
|
||||
free(dataset->images);
|
||||
free(dataset);
|
||||
gree(dataset->fileNames);
|
||||
gree(dataset->labels);
|
||||
gree(dataset->images);
|
||||
gree(dataset);
|
||||
}
|
||||
|
||||
unsigned int getLabel(char* string) {
|
||||
|
@ -9,7 +9,7 @@
|
||||
#define BLOCKSIZE_y 16
|
||||
|
||||
#ifdef __CUDACC__
|
||||
__global__ void matrix_mul_kernel(float* Md, float* Nd, float* Pd, int n, int p, int q, size_t pitch_m, size_t pitch_n, size_t pitch_p) {
|
||||
__global__ void matrix_mul_kernel(float** Md, float** Nd, float** Pd, int n, int p, int q) {
|
||||
// Chaque thread calcule toutes les multiplications utilisant l'élément Nd[tx][ty]
|
||||
int tx = (blockIdx.x*blockDim.x) + threadIdx.x; // Indice de colonne
|
||||
int ty = (blockIdx.y*blockDim.y) + threadIdx.y; // Indice de ligne
|
||||
@ -18,58 +18,19 @@ __global__ void matrix_mul_kernel(float* Md, float* Nd, float* Pd, int n, int p,
|
||||
return;
|
||||
}
|
||||
|
||||
// Pvalue stores the Pd element that is computed by the thread
|
||||
float* M_offset;
|
||||
float* P_offset;
|
||||
float* N_offset = (float *)((char*)Nd + tx * pitch_n);
|
||||
float Nxy = N_offset[ty]; // N[tx][ty]
|
||||
|
||||
for (int i = 0; i < n; i++) {
|
||||
M_offset = (float *)((char*)Md + i * pitch_m);
|
||||
P_offset = (float*)((char*)Pd + i * pitch_p); // P[i], pitch_p est un décalage en bytes
|
||||
atomicAdd(&P_offset[ty], M_offset[tx] * Nxy); // P[i][ty] += P[i][tx] * N[tx][ty]
|
||||
atomicAdd(&(Pd[i][ty]), Md[i][tx]*Nd[tx][ty]);
|
||||
// P[i][ty] += P[i][tx] * N[tx][ty]
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
void matrix_multiplication_device(float** m1, float** m2, float** result, int n, int p, int q) {
|
||||
// Préparation des matrices
|
||||
size_t pitch_m1_dev;
|
||||
size_t pitch_m2_dev;
|
||||
size_t pitch_result_dev;
|
||||
float* m1_dev;
|
||||
float* m2_dev;
|
||||
float* result_dev;
|
||||
|
||||
gpuErrchk( cudaMallocPitch((void**)&m1_dev, &pitch_m1_dev, p * sizeof(float), n));
|
||||
for (int i=0; i < n; i++) {
|
||||
gpuErrchk( cudaMemcpy((void*)((char*)m1_dev + i*pitch_m1_dev), (const void*)&(m1[i][0]), p*sizeof(float), cudaMemcpyHostToDevice));
|
||||
}
|
||||
|
||||
gpuErrchk( cudaMallocPitch((void**)&m2_dev, &pitch_m2_dev, q * sizeof(float), p));
|
||||
for (int i=0; i < p; i++) {
|
||||
gpuErrchk( cudaMemcpy((void*)((char*)m2_dev + i*pitch_m2_dev), (const void*)&(m2[i][0]), q*sizeof(float), cudaMemcpyHostToDevice));
|
||||
}
|
||||
|
||||
gpuErrchk( cudaMallocPitch((void**)&result_dev, &pitch_result_dev, q * sizeof(float), n));
|
||||
gpuErrchk( cudaMemset(result_dev, 0, pitch_result_dev*n));
|
||||
|
||||
// Traitement
|
||||
dim3 gridSize(i_div_up(p, BLOCKSIZE_x), i_div_up(q, BLOCKSIZE_y));
|
||||
dim3 blockSize(BLOCKSIZE_x, BLOCKSIZE_y);
|
||||
|
||||
matrix_mul_kernel<<<gridSize, blockSize>>>(m1_dev, m2_dev, result_dev, n, p, q, pitch_m1_dev, pitch_m2_dev, pitch_result_dev);
|
||||
gpuErrchk( cudaPeekAtLastError() );
|
||||
gpuErrchk( cudaDeviceSynchronize() );
|
||||
|
||||
// Post-traitement
|
||||
for (int i=0; i < n; i++) {
|
||||
gpuErrchk( cudaMemcpy((void*)&(result[i][0]), (const void*)((char*)result_dev + i*pitch_result_dev), sizeof(float)*q, cudaMemcpyDeviceToHost));
|
||||
}
|
||||
|
||||
gpuErrchk( cudaFree(result_dev) );
|
||||
gpuErrchk( cudaFree(m1_dev) );
|
||||
gpuErrchk( cudaFree(m2_dev) );
|
||||
matrix_mul_kernel<<<gridSize, blockSize>>>(m1, m2, result, n, p, q);
|
||||
gpuErrchk( cudaPeekAtLastError() );
|
||||
gpuErrchk( cudaDeviceSynchronize() );
|
||||
}
|
||||
|
@ -4,6 +4,7 @@
|
||||
#include <inttypes.h>
|
||||
|
||||
#include "../include/colors.h"
|
||||
#include "../include/utils.h"
|
||||
#include "include/neuron_io.h"
|
||||
#include "include/struct.h"
|
||||
|
||||
@ -130,7 +131,7 @@ void write_couche(Network* network, int indice_couche, int type_couche, FILE* pt
|
||||
|
||||
Network* read_network(char* filename) {
|
||||
FILE *ptr;
|
||||
Network* network = (Network*)malloc(sizeof(Network));
|
||||
Network* network = (Network*)nalloc(sizeof(Network));
|
||||
|
||||
ptr = fopen(filename, "rb");
|
||||
|
||||
@ -156,8 +157,8 @@ Network* read_network(char* filename) {
|
||||
network->dropout = dropout;
|
||||
|
||||
// Lecture de la taille de l'entrée des différentes matrices
|
||||
network->width = (int*)malloc(sizeof(int)*size);
|
||||
network->depth = (int*)malloc(sizeof(int)*size);
|
||||
network->width = (int*)nalloc(sizeof(int)*size);
|
||||
network->depth = (int*)nalloc(sizeof(int)*size);
|
||||
|
||||
for (int i=0; i < (int)size; i++) {
|
||||
fread(&tmp, sizeof(uint32_t), 1, ptr);
|
||||
@ -175,19 +176,19 @@ Network* read_network(char* filename) {
|
||||
}
|
||||
|
||||
// Lecture de chaque couche
|
||||
network->kernel = (Kernel**)malloc(sizeof(Kernel*)*(size-1));
|
||||
network->kernel = (Kernel**)nalloc(sizeof(Kernel*)*(size-1));
|
||||
|
||||
for (int i=0; i < (int)size-1; i++) {
|
||||
network->kernel[i] = read_kernel(type_couche[i], network->width[i+1], ptr);
|
||||
}
|
||||
|
||||
network->input = (float****)malloc(sizeof(float***)*size);
|
||||
network->input = (float****)nalloc(sizeof(float***)*size);
|
||||
for (int i=0; i < (int)size; i++) { // input[size][couche->depth][couche->dim][couche->dim]
|
||||
network->input[i] = (float***)malloc(sizeof(float**)*network->depth[i]);
|
||||
network->input[i] = (float***)nalloc(sizeof(float**)*network->depth[i]);
|
||||
for (int j=0; j < network->depth[i]; j++) {
|
||||
network->input[i][j] = (float**)malloc(sizeof(float*)*network->width[i]);
|
||||
network->input[i][j] = (float**)nalloc(sizeof(float*)*network->width[i]);
|
||||
for (int k=0; k < network->width[i]; k++) {
|
||||
network->input[i][j][k] = (float*)malloc(sizeof(float)*network->width[i]);
|
||||
network->input[i][j][k] = (float*)nalloc(sizeof(float)*network->width[i]);
|
||||
for (int l=0; l < network->width[i]; l++) {
|
||||
network->input[i][j][k][l] = 0.;
|
||||
}
|
||||
@ -195,13 +196,13 @@ Network* read_network(char* filename) {
|
||||
}
|
||||
}
|
||||
|
||||
network->input_z = (float****)malloc(sizeof(float***)*size);
|
||||
network->input_z = (float****)nalloc(sizeof(float***)*size);
|
||||
for (int i=0; i < (int)size; i++) { // input[size][couche->depth][couche->dim][couche->dim]
|
||||
network->input_z[i] = (float***)malloc(sizeof(float**)*network->depth[i]);
|
||||
network->input_z[i] = (float***)nalloc(sizeof(float**)*network->depth[i]);
|
||||
for (int j=0; j < network->depth[i]; j++) {
|
||||
network->input_z[i][j] = (float**)malloc(sizeof(float*)*network->width[i]);
|
||||
network->input_z[i][j] = (float**)nalloc(sizeof(float*)*network->width[i]);
|
||||
for (int k=0; k < network->width[i]; k++) {
|
||||
network->input_z[i][j][k] = (float*)malloc(sizeof(float)*network->width[i]);
|
||||
network->input_z[i][j][k] = (float*)nalloc(sizeof(float)*network->width[i]);
|
||||
for (int l=0; l < network->width[i]; l++) {
|
||||
network->input_z[i][j][k][l] = 0.;
|
||||
}
|
||||
@ -214,10 +215,10 @@ Network* read_network(char* filename) {
|
||||
}
|
||||
|
||||
Kernel* read_kernel(int type_couche, int output_dim, FILE* ptr) {
|
||||
Kernel* kernel = (Kernel*)malloc(sizeof(Kernel));
|
||||
Kernel* kernel = (Kernel*)nalloc(sizeof(Kernel));
|
||||
if (type_couche == 0) { // Cas du CNN
|
||||
// Lecture du "Pré-corps"
|
||||
kernel->cnn = (Kernel_cnn*)malloc(sizeof(Kernel_cnn));
|
||||
kernel->cnn = (Kernel_cnn*)nalloc(sizeof(Kernel_cnn));
|
||||
kernel->nn = NULL;
|
||||
uint32_t buffer[5];
|
||||
fread(&buffer, sizeof(buffer), 1, ptr);
|
||||
@ -232,14 +233,14 @@ Kernel* read_kernel(int type_couche, int output_dim, FILE* ptr) {
|
||||
Kernel_cnn* cnn = kernel->cnn;
|
||||
float tmp;
|
||||
|
||||
cnn->bias = (float***)malloc(sizeof(float**)*cnn->columns);
|
||||
cnn->d_bias = (float***)malloc(sizeof(float**)*cnn->columns);
|
||||
cnn->bias = (float***)nalloc(sizeof(float**)*cnn->columns);
|
||||
cnn->d_bias = (float***)nalloc(sizeof(float**)*cnn->columns);
|
||||
for (int i=0; i < cnn->columns; i++) {
|
||||
cnn->bias[i] = (float**)malloc(sizeof(float*)*output_dim);
|
||||
cnn->d_bias[i] = (float**)malloc(sizeof(float*)*output_dim);
|
||||
cnn->bias[i] = (float**)nalloc(sizeof(float*)*output_dim);
|
||||
cnn->d_bias[i] = (float**)nalloc(sizeof(float*)*output_dim);
|
||||
for (int j=0; j < output_dim; j++) {
|
||||
cnn->bias[i][j] = (float*)malloc(sizeof(float)*output_dim);
|
||||
cnn->d_bias[i][j] = (float*)malloc(sizeof(float)*output_dim);
|
||||
cnn->bias[i][j] = (float*)nalloc(sizeof(float)*output_dim);
|
||||
cnn->d_bias[i][j] = (float*)nalloc(sizeof(float)*output_dim);
|
||||
for (int k=0; k < output_dim; k++) {
|
||||
fread(&tmp, sizeof(tmp), 1, ptr);
|
||||
cnn->bias[i][j][k] = tmp;
|
||||
@ -248,17 +249,17 @@ Kernel* read_kernel(int type_couche, int output_dim, FILE* ptr) {
|
||||
}
|
||||
}
|
||||
|
||||
cnn->w = (float****)malloc(sizeof(float***)*cnn->rows);
|
||||
cnn->d_w = (float****)malloc(sizeof(float***)*cnn->rows);
|
||||
cnn->w = (float****)nalloc(sizeof(float***)*cnn->rows);
|
||||
cnn->d_w = (float****)nalloc(sizeof(float***)*cnn->rows);
|
||||
for (int i=0; i < cnn->rows; i++) {
|
||||
cnn->w[i] = (float***)malloc(sizeof(float**)*cnn->columns);
|
||||
cnn->d_w[i] = (float***)malloc(sizeof(float**)*cnn->columns);
|
||||
cnn->w[i] = (float***)nalloc(sizeof(float**)*cnn->columns);
|
||||
cnn->d_w[i] = (float***)nalloc(sizeof(float**)*cnn->columns);
|
||||
for (int j=0; j < cnn->columns; j++) {
|
||||
cnn->w[i][j] = (float**)malloc(sizeof(float*)*cnn->k_size);
|
||||
cnn->d_w[i][j] = (float**)malloc(sizeof(float*)*cnn->k_size);
|
||||
cnn->w[i][j] = (float**)nalloc(sizeof(float*)*cnn->k_size);
|
||||
cnn->d_w[i][j] = (float**)nalloc(sizeof(float*)*cnn->k_size);
|
||||
for (int k=0; k < cnn->k_size; k++) {
|
||||
cnn->w[i][j][k] = (float*)malloc(sizeof(float)*cnn->k_size);
|
||||
cnn->d_w[i][j][k] = (float*)malloc(sizeof(float)*cnn->k_size);
|
||||
cnn->w[i][j][k] = (float*)nalloc(sizeof(float)*cnn->k_size);
|
||||
cnn->d_w[i][j][k] = (float*)nalloc(sizeof(float)*cnn->k_size);
|
||||
for (int l=0; l < cnn->k_size; l++) {
|
||||
fread(&tmp, sizeof(tmp), 1, ptr);
|
||||
cnn->w[i][j][k][l] = tmp;
|
||||
@ -269,7 +270,7 @@ Kernel* read_kernel(int type_couche, int output_dim, FILE* ptr) {
|
||||
}
|
||||
} else if (type_couche == 1) { // Cas du NN
|
||||
// Lecture du "Pré-corps"
|
||||
kernel->nn = (Kernel_nn*)malloc(sizeof(Kernel_nn));
|
||||
kernel->nn = (Kernel_nn*)nalloc(sizeof(Kernel_nn));
|
||||
kernel->cnn = NULL;
|
||||
uint32_t buffer[4];
|
||||
fread(&buffer, sizeof(buffer), 1, ptr);
|
||||
@ -283,19 +284,19 @@ Kernel* read_kernel(int type_couche, int output_dim, FILE* ptr) {
|
||||
Kernel_nn* nn = kernel->nn;
|
||||
float tmp;
|
||||
|
||||
nn->bias = (float*)malloc(sizeof(float)*nn->output_units);
|
||||
nn->d_bias = (float*)malloc(sizeof(float)*nn->output_units);
|
||||
nn->bias = (float*)nalloc(sizeof(float)*nn->output_units);
|
||||
nn->d_bias = (float*)nalloc(sizeof(float)*nn->output_units);
|
||||
for (int i=0; i < nn->output_units; i++) {
|
||||
fread(&tmp, sizeof(tmp), 1, ptr);
|
||||
nn->bias[i] = tmp;
|
||||
nn->d_bias[i] = 0.;
|
||||
}
|
||||
|
||||
nn->weights = (float**)malloc(sizeof(float*)*nn->input_units);
|
||||
nn->d_weights = (float**)malloc(sizeof(float*)*nn->input_units);
|
||||
nn->weights = (float**)nalloc(sizeof(float*)*nn->input_units);
|
||||
nn->d_weights = (float**)nalloc(sizeof(float*)*nn->input_units);
|
||||
for (int i=0; i < nn->input_units; i++) {
|
||||
nn->weights[i] = (float*)malloc(sizeof(float)*nn->output_units);
|
||||
nn->d_weights[i] = (float*)malloc(sizeof(float)*nn->output_units);
|
||||
nn->weights[i] = (float*)nalloc(sizeof(float)*nn->output_units);
|
||||
nn->d_weights[i] = (float*)nalloc(sizeof(float)*nn->output_units);
|
||||
for (int j=0; j < nn->output_units; j++) {
|
||||
fread(&tmp, sizeof(tmp), 1, ptr);
|
||||
nn->weights[i][j] = tmp;
|
||||
|
@ -3,6 +3,8 @@
|
||||
#include <stdint.h>
|
||||
#include <inttypes.h>
|
||||
|
||||
#include "../include/utils.h"
|
||||
|
||||
#include "include/jpeg.h"
|
||||
|
||||
|
||||
@ -36,11 +38,11 @@ void preview_images(char* path, int limit) {
|
||||
if (!dataset->images[i]) {
|
||||
image = loadJpegImageFile(dataset->fileNames[i]);
|
||||
dataset->images[i] = image->lpData;
|
||||
free(image);
|
||||
gree(image);
|
||||
}
|
||||
print_image(dataset->images[i], dataset->height, dataset->width);
|
||||
|
||||
free(dataset->images[i]);
|
||||
gree(dataset->images[i]);
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -5,6 +5,7 @@
|
||||
|
||||
#include "../mnist/include/mnist.h"
|
||||
#include "include/neuron_io.h"
|
||||
#include "../include/utils.h"
|
||||
#include "include/struct.h"
|
||||
#include "include/jpeg.h"
|
||||
#include "include/free.h"
|
||||
@ -29,7 +30,7 @@ void test_network_mnist(Network* network, char* images_file, char* labels_file,
|
||||
|
||||
width = mnist_parameters[1];
|
||||
height = mnist_parameters[2];
|
||||
free(mnist_parameters);
|
||||
gree(mnist_parameters);
|
||||
|
||||
// Load image in the first layer of the Network
|
||||
for (int i=0; i < nb_elem; i++) {
|
||||
@ -46,11 +47,11 @@ void test_network_mnist(Network* network, char* images_file, char* labels_file,
|
||||
}
|
||||
|
||||
for (int j=0; j < height; j++) {
|
||||
free(images[i][j]);
|
||||
gree(images[i][j]);
|
||||
}
|
||||
free(images[i]);
|
||||
gree(images[i]);
|
||||
}
|
||||
free(images);
|
||||
gree(images);
|
||||
printf("%d Images. Taux de réussite: %.2f%%\n", nb_elem, 100*accuracy/(float)nb_elem);
|
||||
}
|
||||
|
||||
@ -75,13 +76,13 @@ void test_network_jpg(Network* network, char* data_dir, bool preview_fails) {
|
||||
accuracy++;
|
||||
}
|
||||
|
||||
free(dataset->images[i]);
|
||||
gree(dataset->images[i]);
|
||||
}
|
||||
|
||||
printf("%d Images. Taux de réussite: %.2f%%\n", dataset->numImages, 100*accuracy/(float)dataset->numImages);
|
||||
free(dataset->images);
|
||||
free(dataset->labels);
|
||||
free(dataset);
|
||||
gree(dataset->images);
|
||||
gree(dataset->labels);
|
||||
gree(dataset);
|
||||
}
|
||||
|
||||
|
||||
@ -109,7 +110,7 @@ void recognize_mnist(Network* network, char* input_file, char* out) {
|
||||
|
||||
width = mnist_parameters[1];
|
||||
height = mnist_parameters[2];
|
||||
free(mnist_parameters);
|
||||
gree(mnist_parameters);
|
||||
|
||||
if (! strcmp(out, "json")) {
|
||||
printf("{\n");
|
||||
@ -147,15 +148,15 @@ void recognize_mnist(Network* network, char* input_file, char* out) {
|
||||
}
|
||||
|
||||
for (int j=0; j < height; j++) {
|
||||
free(images[i][j]);
|
||||
gree(images[i][j]);
|
||||
}
|
||||
free(images[i]);
|
||||
gree(images[i]);
|
||||
}
|
||||
if (! strcmp(out, "json")) {
|
||||
printf("}\n");
|
||||
}
|
||||
|
||||
free(images);
|
||||
gree(images);
|
||||
}
|
||||
|
||||
void recognize_jpg(Network* network, char* input_file, char* out) {
|
||||
@ -194,8 +195,8 @@ void recognize_jpg(Network* network, char* input_file, char* out) {
|
||||
printf("}\n");
|
||||
}
|
||||
|
||||
free(image->lpData);
|
||||
free(image);
|
||||
gree(image->lpData);
|
||||
gree(image);
|
||||
}
|
||||
|
||||
void recognize(int dataset_type, char* modele, char* input_file, char* out) {
|
||||
|
@ -11,6 +11,7 @@
|
||||
#include "include/initialisation.h"
|
||||
#include "include/neuron_io.h"
|
||||
#include "../include/colors.h"
|
||||
#include "../include/utils.h"
|
||||
#include "include/function.h"
|
||||
#include "include/creation.h"
|
||||
#include "include/update.h"
|
||||
@ -70,7 +71,7 @@ void* train_thread(void* parameters) {
|
||||
if (!param->dataset->images[index[i]]) {
|
||||
image = loadJpegImageFile(param->dataset->fileNames[index[i]]);
|
||||
param->dataset->images[index[i]] = image->lpData;
|
||||
free(image);
|
||||
gree(image);
|
||||
}
|
||||
write_image_in_network_260(param->dataset->images[index[i]], height, width, network->input[0]);
|
||||
forward_propagation(network);
|
||||
@ -81,7 +82,7 @@ void* train_thread(void* parameters) {
|
||||
accuracy += 1.;
|
||||
}
|
||||
|
||||
free(param->dataset->images[index[i]]);
|
||||
gree(param->dataset->images[index[i]]);
|
||||
param->dataset->images[index[i]] = NULL;
|
||||
}
|
||||
}
|
||||
@ -123,7 +124,7 @@ void train(int dataset_type, char* images_file, char* labels_file, char* data_di
|
||||
// Chargement des images du set de données MNIST
|
||||
int* parameters = read_mnist_images_parameters(images_file);
|
||||
nb_images_total = parameters[0];
|
||||
free(parameters);
|
||||
gree(parameters);
|
||||
|
||||
images = read_mnist_images(images_file);
|
||||
labels = read_mnist_labels(labels_file);
|
||||
@ -191,7 +192,7 @@ void train(int dataset_type, char* images_file, char* labels_file, char* data_di
|
||||
// thread dans l'hypothèse ou le multi-threading n'est pas utilisé.
|
||||
// Cela est utile à des fins de débogage notamment,
|
||||
// où l'utilisation de threads rend vite les choses plus compliquées qu'elles ne le sont.
|
||||
TrainParameters* train_params = (TrainParameters*)malloc(sizeof(TrainParameters));
|
||||
TrainParameters* train_params = (TrainParameters*)nalloc(sizeof(TrainParameters));
|
||||
|
||||
train_params->network = network;
|
||||
train_params->dataset_type = dataset_type;
|
||||
@ -283,7 +284,7 @@ void train(int dataset_type, char* images_file, char* labels_file, char* data_di
|
||||
}
|
||||
}
|
||||
current_accuracy = accuracy * nb_images_total/((j+1)*BATCHES);
|
||||
printf("\rThreads [%d]\tÉpoque [%d/%d]\tImage [%d/%d]\tAccuracy: "YELLOW"%0.2f%%"RESET, nb_threads, i, epochs, BATCHES*(j+1), nb_images_total, current_accuracy*100);
|
||||
printf("\rThreads [%d]\tÉpoque [%d/%d]\tImage [%d/%d]\tAccuracy: " YELLOW "%0.2f%%" RESET, nb_threads, i, epochs, BATCHES*(j+1), nb_images_total, current_accuracy*100);
|
||||
fflush(stdout);
|
||||
#else
|
||||
(void)nb_images_total_remaining; // Juste pour enlever un warning
|
||||
@ -315,7 +316,7 @@ void train(int dataset_type, char* images_file, char* labels_file, char* data_di
|
||||
end_time = omp_get_wtime();
|
||||
elapsed_time = end_time - start_time;
|
||||
#ifdef USE_MULTITHREADING
|
||||
printf("\rThreads [%d]\tÉpoque [%d/%d]\tImage [%d/%d]\tAccuracy: "GREEN"%0.4f%%"RESET" \tTemps: %0.2f s\n", nb_threads, i, epochs, nb_images_total, nb_images_total, accuracy*100, elapsed_time);
|
||||
printf("\rThreads [%d]\tÉpoque [%d/%d]\tImage [%d/%d]\tAccuracy: " GREEN "%0.4f%%" RESET " \tTemps: %0.2f s\n", nb_threads, i, epochs, nb_images_total, nb_images_total, accuracy*100, elapsed_time);
|
||||
#else
|
||||
printf("\rÉpoque [%d/%d]\tImage [%d/%d]\tAccuracy: "GREEN"%0.4f%%"RESET" \tTemps: %0.2f s\n", i, epochs, nb_images_total, nb_images_total, accuracy*100, elapsed_time);
|
||||
#endif
|
||||
@ -332,7 +333,7 @@ void train(int dataset_type, char* images_file, char* labels_file, char* data_di
|
||||
#ifdef USE_MULTITHREADING
|
||||
free(tid);
|
||||
for (int i=0; i < nb_threads; i++) {
|
||||
free(train_parameters[i]->network);
|
||||
free_network(train_parameters[i]->network);
|
||||
}
|
||||
free(train_parameters);
|
||||
#else
|
||||
@ -342,12 +343,12 @@ void train(int dataset_type, char* images_file, char* labels_file, char* data_di
|
||||
if (dataset_type == 0) {
|
||||
for (int i=0; i < nb_images_total; i++) {
|
||||
for (int j=0; j < 28; j++) {
|
||||
free(images[i][j]);
|
||||
gree(images[i][j]);
|
||||
}
|
||||
free(images[i]);
|
||||
gree(images[i]);
|
||||
}
|
||||
free(images);
|
||||
free(labels);
|
||||
gree(images);
|
||||
gree(labels);
|
||||
} else {
|
||||
free_dataset(dataset);
|
||||
}
|
||||
|
@ -4,6 +4,7 @@
|
||||
#include <string.h>
|
||||
|
||||
#include "../include/colors.h"
|
||||
#include "../include/utils.h"
|
||||
#include "include/struct.h"
|
||||
|
||||
#define copyVar(var) network_cp->var = network->var
|
||||
@ -93,7 +94,7 @@ bool equals_networks(Network* network1, Network* network2) {
|
||||
|
||||
|
||||
Network* copy_network(Network* network) {
|
||||
Network* network_cp = (Network*)malloc(sizeof(Network));
|
||||
Network* network_cp = (Network*)nalloc(sizeof(Network));
|
||||
// Paramètre du réseau
|
||||
int size = network->size;
|
||||
// Paramètres des couches NN
|
||||
@ -111,17 +112,17 @@ Network* copy_network(Network* network) {
|
||||
copyVar(max_size);
|
||||
copyVar(size);
|
||||
|
||||
network_cp->width = (int*)malloc(sizeof(int)*size);
|
||||
network_cp->depth = (int*)malloc(sizeof(int)*size);
|
||||
network_cp->width = (int*)nalloc(sizeof(int)*size);
|
||||
network_cp->depth = (int*)nalloc(sizeof(int)*size);
|
||||
|
||||
for (int i=0; i < size; i++) {
|
||||
copyVar(width[i]);
|
||||
copyVar(depth[i]);
|
||||
}
|
||||
|
||||
network_cp->kernel = (Kernel**)malloc(sizeof(Kernel*)*(size-1));
|
||||
network_cp->kernel = (Kernel**)nalloc(sizeof(Kernel*)*(size-1));
|
||||
for (int i=0; i < size-1; i++) {
|
||||
network_cp->kernel[i] = (Kernel*)malloc(sizeof(Kernel));
|
||||
network_cp->kernel[i] = (Kernel*)nalloc(sizeof(Kernel));
|
||||
if (!network->kernel[i]->nn && !network->kernel[i]->cnn) { // Cas de la couche de linéarisation
|
||||
copyVar(kernel[i]->activation);
|
||||
copyVar(kernel[i]->linearisation); // 1
|
||||
@ -136,23 +137,23 @@ Network* copy_network(Network* network) {
|
||||
output_units = network->kernel[i]->nn->output_units;
|
||||
|
||||
network_cp->kernel[i]->cnn = NULL;
|
||||
network_cp->kernel[i]->nn = (Kernel_nn*)malloc(sizeof(Kernel_nn));
|
||||
network_cp->kernel[i]->nn = (Kernel_nn*)nalloc(sizeof(Kernel_nn));
|
||||
|
||||
copyVar(kernel[i]->nn->input_units);
|
||||
copyVar(kernel[i]->nn->output_units);
|
||||
|
||||
network_cp->kernel[i]->nn->bias = (float*)malloc(sizeof(float)*output_units);
|
||||
network_cp->kernel[i]->nn->d_bias = (float*)malloc(sizeof(float)*output_units);
|
||||
network_cp->kernel[i]->nn->bias = (float*)nalloc(sizeof(float)*output_units);
|
||||
network_cp->kernel[i]->nn->d_bias = (float*)nalloc(sizeof(float)*output_units);
|
||||
for (int j=0; j < output_units; j++) {
|
||||
copyVar(kernel[i]->nn->bias[j]);
|
||||
network_cp->kernel[i]->nn->d_bias[j] = 0.;
|
||||
}
|
||||
|
||||
network_cp->kernel[i]->nn->weights = (float**)malloc(sizeof(float*)*input_units);
|
||||
network_cp->kernel[i]->nn->d_weights = (float**)malloc(sizeof(float*)*input_units);
|
||||
network_cp->kernel[i]->nn->weights = (float**)nalloc(sizeof(float*)*input_units);
|
||||
network_cp->kernel[i]->nn->d_weights = (float**)nalloc(sizeof(float*)*input_units);
|
||||
for (int j=0; j < input_units; j++) {
|
||||
network_cp->kernel[i]->nn->weights[j] = (float*)malloc(sizeof(float)*output_units);
|
||||
network_cp->kernel[i]->nn->d_weights[j] = (float*)malloc(sizeof(float)*output_units);
|
||||
network_cp->kernel[i]->nn->weights[j] = (float*)nalloc(sizeof(float)*output_units);
|
||||
network_cp->kernel[i]->nn->d_weights[j] = (float*)nalloc(sizeof(float)*output_units);
|
||||
for (int k=0; k < output_units; k++) {
|
||||
copyVar(kernel[i]->nn->weights[j][k]);
|
||||
network_cp->kernel[i]->nn->d_weights[j][k] = 0.;
|
||||
@ -170,20 +171,20 @@ Network* copy_network(Network* network) {
|
||||
|
||||
|
||||
network_cp->kernel[i]->nn = NULL;
|
||||
network_cp->kernel[i]->cnn = (Kernel_cnn*)malloc(sizeof(Kernel_cnn));
|
||||
network_cp->kernel[i]->cnn = (Kernel_cnn*)nalloc(sizeof(Kernel_cnn));
|
||||
|
||||
copyVar(kernel[i]->cnn->rows);
|
||||
copyVar(kernel[i]->cnn->k_size);
|
||||
copyVar(kernel[i]->cnn->columns);
|
||||
|
||||
network_cp->kernel[i]->cnn->bias = (float***)malloc(sizeof(float**)*columns);
|
||||
network_cp->kernel[i]->cnn->d_bias = (float***)malloc(sizeof(float**)*columns);
|
||||
network_cp->kernel[i]->cnn->bias = (float***)nalloc(sizeof(float**)*columns);
|
||||
network_cp->kernel[i]->cnn->d_bias = (float***)nalloc(sizeof(float**)*columns);
|
||||
for (int j=0; j < columns; j++) {
|
||||
network_cp->kernel[i]->cnn->bias[j] = (float**)malloc(sizeof(float*)*output_dim);
|
||||
network_cp->kernel[i]->cnn->d_bias[j] = (float**)malloc(sizeof(float*)*output_dim);
|
||||
network_cp->kernel[i]->cnn->bias[j] = (float**)nalloc(sizeof(float*)*output_dim);
|
||||
network_cp->kernel[i]->cnn->d_bias[j] = (float**)nalloc(sizeof(float*)*output_dim);
|
||||
for (int k=0; k < output_dim; k++) {
|
||||
network_cp->kernel[i]->cnn->bias[j][k] = (float*)malloc(sizeof(float)*output_dim);
|
||||
network_cp->kernel[i]->cnn->d_bias[j][k] = (float*)malloc(sizeof(float)*output_dim);
|
||||
network_cp->kernel[i]->cnn->bias[j][k] = (float*)nalloc(sizeof(float)*output_dim);
|
||||
network_cp->kernel[i]->cnn->d_bias[j][k] = (float*)nalloc(sizeof(float)*output_dim);
|
||||
for (int l=0; l < output_dim; l++) {
|
||||
copyVar(kernel[i]->cnn->bias[j][k][l]);
|
||||
network_cp->kernel[i]->cnn->d_bias[j][k][l] = 0.;
|
||||
@ -191,17 +192,17 @@ Network* copy_network(Network* network) {
|
||||
}
|
||||
}
|
||||
|
||||
network_cp->kernel[i]->cnn->w = (float****)malloc(sizeof(float***)*rows);
|
||||
network_cp->kernel[i]->cnn->d_w = (float****)malloc(sizeof(float***)*rows);
|
||||
network_cp->kernel[i]->cnn->w = (float****)nalloc(sizeof(float***)*rows);
|
||||
network_cp->kernel[i]->cnn->d_w = (float****)nalloc(sizeof(float***)*rows);
|
||||
for (int j=0; j < rows; j++) {
|
||||
network_cp->kernel[i]->cnn->w[j] = (float***)malloc(sizeof(float**)*columns);
|
||||
network_cp->kernel[i]->cnn->d_w[j] = (float***)malloc(sizeof(float**)*columns);
|
||||
network_cp->kernel[i]->cnn->w[j] = (float***)nalloc(sizeof(float**)*columns);
|
||||
network_cp->kernel[i]->cnn->d_w[j] = (float***)nalloc(sizeof(float**)*columns);
|
||||
for (int k=0; k < columns; k++) {
|
||||
network_cp->kernel[i]->cnn->w[j][k] = (float**)malloc(sizeof(float*)*k_size);
|
||||
network_cp->kernel[i]->cnn->d_w[j][k] = (float**)malloc(sizeof(float*)*k_size);
|
||||
network_cp->kernel[i]->cnn->w[j][k] = (float**)nalloc(sizeof(float*)*k_size);
|
||||
network_cp->kernel[i]->cnn->d_w[j][k] = (float**)nalloc(sizeof(float*)*k_size);
|
||||
for (int l=0; l < k_size; l++) {
|
||||
network_cp->kernel[i]->cnn->w[j][k][l] = (float*)malloc(sizeof(float)*k_size);
|
||||
network_cp->kernel[i]->cnn->d_w[j][k][l] = (float*)malloc(sizeof(float)*k_size);
|
||||
network_cp->kernel[i]->cnn->w[j][k][l] = (float*)nalloc(sizeof(float)*k_size);
|
||||
network_cp->kernel[i]->cnn->d_w[j][k][l] = (float*)nalloc(sizeof(float)*k_size);
|
||||
for (int m=0; m < k_size; m++) {
|
||||
copyVar(kernel[i]->cnn->w[j][k][l][m]);
|
||||
network_cp->kernel[i]->cnn->d_w[j][k][l][m] = 0.;
|
||||
@ -212,13 +213,13 @@ Network* copy_network(Network* network) {
|
||||
}
|
||||
}
|
||||
|
||||
network_cp->input = (float****)malloc(sizeof(float***)*size);
|
||||
network_cp->input = (float****)nalloc(sizeof(float***)*size);
|
||||
for (int i=0; i < size; i++) { // input[size][couche->depth][couche->dim][couche->dim]
|
||||
network_cp->input[i] = (float***)malloc(sizeof(float**)*network->depth[i]);
|
||||
network_cp->input[i] = (float***)nalloc(sizeof(float**)*network->depth[i]);
|
||||
for (int j=0; j < network->depth[i]; j++) {
|
||||
network_cp->input[i][j] = (float**)malloc(sizeof(float*)*network->width[i]);
|
||||
network_cp->input[i][j] = (float**)nalloc(sizeof(float*)*network->width[i]);
|
||||
for (int k=0; k < network->width[i]; k++) {
|
||||
network_cp->input[i][j][k] = (float*)malloc(sizeof(float)*network->width[i]);
|
||||
network_cp->input[i][j][k] = (float*)nalloc(sizeof(float)*network->width[i]);
|
||||
for (int l=0; l < network->width[i]; l++) {
|
||||
network_cp->input[i][j][k][l] = 0.;
|
||||
}
|
||||
@ -226,13 +227,13 @@ Network* copy_network(Network* network) {
|
||||
}
|
||||
}
|
||||
|
||||
network_cp->input_z = (float****)malloc(sizeof(float***)*size);
|
||||
network_cp->input_z = (float****)nalloc(sizeof(float***)*size);
|
||||
for (int i=0; i < size; i++) { // input_z[size][couche->depth][couche->dim][couche->dim]
|
||||
network_cp->input_z[i] = (float***)malloc(sizeof(float**)*network->depth[i]);
|
||||
network_cp->input_z[i] = (float***)nalloc(sizeof(float**)*network->depth[i]);
|
||||
for (int j=0; j < network->depth[i]; j++) {
|
||||
network_cp->input_z[i][j] = (float**)malloc(sizeof(float*)*network->width[i]);
|
||||
network_cp->input_z[i][j] = (float**)nalloc(sizeof(float*)*network->width[i]);
|
||||
for (int k=0; k < network->width[i]; k++) {
|
||||
network_cp->input_z[i][j][k] = (float*)malloc(sizeof(float)*network->width[i]);
|
||||
network_cp->input_z[i][j][k] = (float*)nalloc(sizeof(float)*network->width[i]);
|
||||
for (int l=0; l < network->width[i]; l++) {
|
||||
network_cp->input_z[i][j][k][l] = 0.;
|
||||
}
|
||||
|
@ -1,3 +1,9 @@
|
||||
#include <stdio.h>
|
||||
#include <stdbool.h>
|
||||
#ifdef USE_CUDA
|
||||
#include "cuda_runtime.h"
|
||||
#endif
|
||||
|
||||
#ifndef DEF_UTILS_CU_H
|
||||
#define DEF_UTILS_CU_H
|
||||
|
||||
@ -22,4 +28,8 @@ int i_div_up(int a, int b);
|
||||
*/
|
||||
bool check_cuda_compatibility();
|
||||
|
||||
|
||||
void* nalloc(size_t sz);
|
||||
|
||||
void gree(void* ptr);
|
||||
#endif
|
66
src/utils.c
Normal file
66
src/utils.c
Normal file
@ -0,0 +1,66 @@
|
||||
#include <stdlib.h>
|
||||
#include <stdio.h>
|
||||
#include <stdbool.h>
|
||||
|
||||
#ifdef USE_CUDA
|
||||
#include "cuda_runtime.h"
|
||||
#endif
|
||||
#include "include/utils.h"
|
||||
#include "include/colors.h"
|
||||
|
||||
|
||||
int i_div_up(int a, int b) { // Partie entière supérieure de a/b
|
||||
return ((a % b) != 0) ? (a / b + 1) : (a / b);
|
||||
}
|
||||
|
||||
bool check_cuda_compatibility() {
|
||||
#ifdef __CUDACC__
|
||||
int nDevices;
|
||||
cudaDeviceProp prop;
|
||||
|
||||
cudaGetDeviceCount(&nDevices);
|
||||
if (nDevices == 0) {
|
||||
printf("Pas d'utilisation du GPU\n\n");
|
||||
return false;
|
||||
}
|
||||
|
||||
printf("GPUs disponibles:\n");
|
||||
|
||||
for (int i=0; i < nDevices; i++) {
|
||||
cudaGetDeviceProperties(&prop, i);
|
||||
printf(" - %s\n", prop.name);
|
||||
}
|
||||
|
||||
cudaGetDeviceProperties(&prop, 0);
|
||||
printf("Utilisation du GPU: " BLUE "%s" RESET " (Compute capability: %d.%d)\n\n", prop.name, prop.major, prop.minor);
|
||||
return true;
|
||||
#else
|
||||
printf("Pas d'utilisation du GPU\n\n");
|
||||
return false;
|
||||
#endif
|
||||
}
|
||||
|
||||
#ifndef USE_CUDA
|
||||
|
||||
void* nalloc(size_t sz) {
|
||||
void* ptr = malloc(sz);
|
||||
return ptr;
|
||||
}
|
||||
|
||||
void gree(void* ptr) {
|
||||
free(ptr);
|
||||
}
|
||||
|
||||
#else
|
||||
|
||||
void* nalloc(size_t sz) {
|
||||
void* ptr;
|
||||
cudaMallocManaged(&ptr, sz, cudaMemAttachHost);
|
||||
return ptr;
|
||||
}
|
||||
|
||||
void gree(void* ptr) {
|
||||
cudaFree(ptr);
|
||||
}
|
||||
|
||||
#endif
|
27
src/utils.cu
27
src/utils.cu
@ -34,4 +34,29 @@ bool check_cuda_compatibility() {
|
||||
printf("Pas d'utilisation du GPU\n\n");
|
||||
return false;
|
||||
#endif
|
||||
}
|
||||
}
|
||||
|
||||
#ifndef __CUDACC__
|
||||
|
||||
void* nalloc(size_t sz) {
|
||||
void* ptr = malloc(sz);
|
||||
return ptr;
|
||||
}
|
||||
|
||||
void gree(void* ptr) {
|
||||
free(ptr);
|
||||
}
|
||||
|
||||
#else
|
||||
|
||||
void* nalloc(size_t sz) {
|
||||
void* ptr;
|
||||
cudaMallocManaged(&ptr, sz, cudaMemAttachHost);
|
||||
return ptr;
|
||||
}
|
||||
|
||||
void gree(void* ptr) {
|
||||
cudaFree(ptr);
|
||||
}
|
||||
|
||||
#endif
|
||||
|
@ -41,11 +41,11 @@ void print_matrix(float** mat, int n, int p) {
|
||||
|
||||
|
||||
float*** create_matrix(int n, int p, int q, float max_val) {
|
||||
float*** matrix = (float***)malloc(n*sizeof(float**));
|
||||
float*** matrix = (float***)nalloc(n*sizeof(float**));
|
||||
for (int i=0; i < n; i++) {
|
||||
matrix[i] = (float**)malloc(sizeof(float*)*p);
|
||||
matrix[i] = (float**)nalloc(sizeof(float*)*p);
|
||||
for (int j=0; j < p; j++) {
|
||||
matrix[i][j] = (float*)malloc(sizeof(float)*q);
|
||||
matrix[i][j] = (float*)nalloc(sizeof(float)*q);
|
||||
}
|
||||
}
|
||||
|
||||
@ -55,11 +55,11 @@ float*** create_matrix(int n, int p, int q, float max_val) {
|
||||
|
||||
|
||||
float*** create_empty_matrix(int n, int p, int q) {
|
||||
float*** matrix = (float***)malloc(n*sizeof(float**));
|
||||
float*** matrix = (float***)nalloc(n*sizeof(float**));
|
||||
for (int i=0; i < n; i++) {
|
||||
matrix[i] = (float**)malloc(sizeof(float*)*p);
|
||||
matrix[i] = (float**)nalloc(sizeof(float*)*p);
|
||||
for (int j=0; j < p; j++) {
|
||||
matrix[i][j] = (float*)malloc(sizeof(float)*q);
|
||||
matrix[i][j] = (float*)nalloc(sizeof(float)*q);
|
||||
for (int k=0; k < q; k++) {
|
||||
matrix[i][j][k] = 0.;
|
||||
}
|
||||
@ -71,11 +71,11 @@ float*** create_empty_matrix(int n, int p, int q) {
|
||||
void free_matrix(float*** matrix, int n, int p) {
|
||||
for (int i=0; i < n; i++) {
|
||||
for (int j=0; j < p; j++) {
|
||||
free(matrix[i][j]);
|
||||
gree(matrix[i][j]);
|
||||
}
|
||||
free(matrix[i]);
|
||||
gree(matrix[i]);
|
||||
}
|
||||
free(matrix);
|
||||
gree(matrix);
|
||||
}
|
||||
|
||||
bool check_matrices_equality(float*** m1, float*** m2, int n, int p, int q, int acceptation) {
|
||||
@ -97,7 +97,7 @@ void run_convolution_test(int input_dim, int output_dim, int rows, int columns)
|
||||
int k_size = input_dim - output_dim +1;
|
||||
|
||||
// Génération des données aléatoires
|
||||
Kernel_cnn* kernel = (Kernel_cnn*)malloc(sizeof(Kernel_cnn));
|
||||
Kernel_cnn* kernel = (Kernel_cnn*)nalloc(sizeof(Kernel_cnn));
|
||||
|
||||
kernel->k_size = k_size;
|
||||
kernel->rows = rows;
|
||||
@ -108,8 +108,8 @@ void run_convolution_test(int input_dim, int output_dim, int rows, int columns)
|
||||
kernel->d_bias = create_matrix(kernel->columns, output_dim, output_dim, 1.5f);
|
||||
|
||||
// w[rows][columns][k_size][k_size]
|
||||
kernel->w = (float****)malloc(sizeof(float***)*kernel->rows);
|
||||
kernel->d_w = (float****)malloc(sizeof(float***)*kernel->rows);
|
||||
kernel->w = (float****)nalloc(sizeof(float***)*kernel->rows);
|
||||
kernel->d_w = (float****)nalloc(sizeof(float***)*kernel->rows);
|
||||
for (int i=0; i < kernel->rows; i++) {
|
||||
kernel->w[i] = create_matrix(kernel->columns, kernel->k_size, kernel->k_size, 15.0f);
|
||||
kernel->d_w[i] = create_matrix(kernel->columns, kernel->k_size, kernel->k_size, 1.5f);
|
||||
@ -156,8 +156,8 @@ void run_convolution_test(int input_dim, int output_dim, int rows, int columns)
|
||||
free_matrix(kernel->w[i], kernel->columns, kernel->k_size);
|
||||
free_matrix(kernel->d_w[i], kernel->columns, kernel->k_size);
|
||||
}
|
||||
free(kernel->w);
|
||||
free(kernel->d_w);
|
||||
gree(kernel->w);
|
||||
gree(kernel->d_w);
|
||||
|
||||
free_matrix(input, kernel->rows, input_dim);
|
||||
free_matrix(output_cpu, kernel->columns, output_dim);
|
||||
|
@ -37,9 +37,9 @@ void print_matrix(float** mat, int n, int p) {
|
||||
|
||||
|
||||
float** create_matrix(int n, int p) {
|
||||
float** matrix = (float**)malloc(n*sizeof(float*));
|
||||
float** matrix = (float**)nalloc(n*sizeof(float*));
|
||||
for (int i=0; i < n; i++) {
|
||||
matrix[i] = (float*)malloc(sizeof(float)*p);
|
||||
matrix[i] = (float*)nalloc(sizeof(float)*p);
|
||||
}
|
||||
|
||||
fill_matrix_random(matrix, n, p);
|
||||
@ -48,9 +48,9 @@ float** create_matrix(int n, int p) {
|
||||
|
||||
|
||||
float** create_empty_matrix(int n, int p) {
|
||||
float** matrix = (float**)malloc(n*sizeof(float*));
|
||||
float** matrix = (float**)nalloc(n*sizeof(float*));
|
||||
for (int i=0; i < n; i++) {
|
||||
matrix[i] = (float*)malloc(p*sizeof(float));
|
||||
matrix[i] = (float*)nalloc(p*sizeof(float));
|
||||
for (int j=0; j < p; j++) {
|
||||
matrix[i][j] = 0.;
|
||||
}
|
||||
@ -103,24 +103,24 @@ void run_matrices_test(int n, int p, int q) {
|
||||
|
||||
// On libère l'espace mémoire alloué
|
||||
for (int i=0; i < n; i++) {
|
||||
free(matrix1[i]);
|
||||
gree(matrix1[i]);
|
||||
}
|
||||
free(matrix1);
|
||||
gree(matrix1);
|
||||
|
||||
for (int i=0; i < p; i++) {
|
||||
free(matrix2[i]);
|
||||
gree(matrix2[i]);
|
||||
}
|
||||
free(matrix2);
|
||||
gree(matrix2);
|
||||
|
||||
for (int i=0; i < n; i++) {
|
||||
free(result_cpu[i]);
|
||||
gree(result_cpu[i]);
|
||||
}
|
||||
free(result_cpu);
|
||||
gree(result_cpu);
|
||||
|
||||
for (int i=0; i < n; i++) {
|
||||
free(result_gpu[i]);
|
||||
gree(result_gpu[i]);
|
||||
}
|
||||
free(result_gpu);
|
||||
gree(result_gpu);
|
||||
}
|
||||
|
||||
|
||||
|
Loading…
Reference in New Issue
Block a user