diff --git a/Makefile b/Makefile index c904d8b..0d66068 100644 --- a/Makefile +++ b/Makefile @@ -70,7 +70,7 @@ $(BUILDDIR)/dense_%.o: $(DENSE_SRCDIR)/%.c $(DENSE_SRCDIR)/include/%.h # # Build cnn # -cnn: $(BUILDDIR)/cnn-main $(BUILDDIR)/cnn-main-cuda $(BUILDDIR)/cnn-preview $(BUILDDIR)/cnn-export; +cnn: $(BUILDDIR)/cnn-main $(BUILDDIR)/cnn-main-cuda $(BUILDDIR)/cnn-preview $(BUILDDIR)/cnn-export $(BUILDDIR)/cnn-export-cuda; $(BUILDDIR)/cnn-main: $(CNN_SRCDIR)/main.c \ $(BUILDDIR)/cnn_backpropagation.o \ @@ -147,6 +147,28 @@ $(BUILDDIR)/cnn-export: $(CNN_SRCDIR)/export.c \ $(CC) $^ -o $@ $(CFLAGS) $(LD_CFLAGS) +ifdef NVCC_INSTALLED +$(BUILDDIR)/cnn-export-cuda: $(CNN_SRCDIR)/export.c \ + $(BUILDDIR)/cnn_cuda_backpropagation.o \ + $(BUILDDIR)/cnn_cuda_convolution.o \ + $(BUILDDIR)/cnn_neuron_io.cuda.o \ + $(BUILDDIR)/cnn_cuda_function.o \ + $(BUILDDIR)/cnn_free.cuda.o \ + $(BUILDDIR)/cnn_cuda_make.o \ + $(BUILDDIR)/cnn_cnn.cuda.o \ + $(BUILDDIR)/cnn_utils.cuda.o \ + $(BUILDDIR)/cnn_jpeg.cuda.o \ + \ + $(BUILDDIR)/cuda_memory_management.o \ + $(BUILDDIR)/colors.cuda.o \ + $(BUILDDIR)/mnist.o \ + $(BUILDDIR)/cuda_utils.o + $(NVCC) $^ -o $@ $(NVCCFLAGS) $(LD_NVCCFLAGS) +else +$(BUILDDIR)/cnn-export-cuda: $(CNN_SRCDIR)/export.c + @echo "$(NVCC) not found, skipping" +endif + $(BUILDDIR)/cnn_%.o: $(CNN_SRCDIR)/%.c $(CNN_SRCDIR)/include/%.h $(CC) -c $< -o $@ $(CFLAGS) diff --git a/src/cnn/export.c b/src/cnn/export.c index d304ab1..f571125 100644 --- a/src/cnn/export.c +++ b/src/cnn/export.c @@ -82,7 +82,7 @@ void print_poids_ker_cnn(char* modele) { void write_image(float** data, int width, int height, char* base_filename, int layer_id, int kernel_id) { - int filename_length = strlen(base_filename) + (int)log10(layer_id+1)+1 + (int)log10(kernel_id+1)+1 + 21; + int filename_length = strlen(base_filename) + (int)log10(layer_id+1)+1 + (int)log10(kernel_id+1)+1 + 21 +1; char* filename = (char*)malloc(sizeof(char)*filename_length); sprintf(filename, "%s_layer-%d_feature-%d.jpeg", base_filename, layer_id, kernel_id); diff --git a/src/cnn/make.c b/src/cnn/make.c index a205d39..96102bc 100644 --- a/src/cnn/make.c +++ b/src/cnn/make.c @@ -183,17 +183,17 @@ void make_max_pooling(float*** input, float*** output, int size, int output_dept * Dense */ #ifdef __CUDACC__ -__global__ void make_dense_kernel(Kernel_nn* kernel, float* input, float* output, int size_input, int size_output) { +__global__ void make_dense_kernel(float** weights, float* bias, float* input, float* output, int size_input, int size_output) { // Équivalents respectifs de i, j et k dans la boucle effectuée par le cpu int idx = threadIdx.x + blockDim.x*blockIdx.x; // < size_output if (idx >= size_output) { return; } - float f = kernel->bias[idx]; + float f = bias[idx]; for (int j=0; j < size_input; j++) { - f += kernel->weights[j][idx]*input[j]; + f += weights[j][idx]*input[j]; } output[idx] = f; } @@ -203,7 +203,7 @@ void make_dense_device(Kernel_nn* kernel, float* input, float* output, int size_ dim3 gridSize(i_div_up(size_output, BLOCKSIZE_x*BLOCKSIZE_y), 1, 1); dim3 blockSize(BLOCKSIZE_x*BLOCKSIZE_y, 1, BLOCKSIZE_z); - make_dense_kernel<<>>(kernel, input, output, size_input, size_output); + make_dense_kernel<<>>(kernel->weights, kernel->bias, input, output, size_input, size_output); gpuErrchk( cudaPeekAtLastError() ); gpuErrchk( cudaDeviceSynchronize() ); } diff --git a/src/cnn/make.cu b/src/cnn/make.cu index a205d39..96102bc 100644 --- a/src/cnn/make.cu +++ b/src/cnn/make.cu @@ -183,17 +183,17 @@ void make_max_pooling(float*** input, float*** output, int size, int output_dept * Dense */ #ifdef __CUDACC__ -__global__ void make_dense_kernel(Kernel_nn* kernel, float* input, float* output, int size_input, int size_output) { +__global__ void make_dense_kernel(float** weights, float* bias, float* input, float* output, int size_input, int size_output) { // Équivalents respectifs de i, j et k dans la boucle effectuée par le cpu int idx = threadIdx.x + blockDim.x*blockIdx.x; // < size_output if (idx >= size_output) { return; } - float f = kernel->bias[idx]; + float f = bias[idx]; for (int j=0; j < size_input; j++) { - f += kernel->weights[j][idx]*input[j]; + f += weights[j][idx]*input[j]; } output[idx] = f; } @@ -203,7 +203,7 @@ void make_dense_device(Kernel_nn* kernel, float* input, float* output, int size_ dim3 gridSize(i_div_up(size_output, BLOCKSIZE_x*BLOCKSIZE_y), 1, 1); dim3 blockSize(BLOCKSIZE_x*BLOCKSIZE_y, 1, BLOCKSIZE_z); - make_dense_kernel<<>>(kernel, input, output, size_input, size_output); + make_dense_kernel<<>>(kernel->weights, kernel->bias, input, output, size_input, size_output); gpuErrchk( cudaPeekAtLastError() ); gpuErrchk( cudaDeviceSynchronize() ); }