mirror of
https://github.com/augustin64/projet-tipe
synced 2025-01-23 15:16:26 +01:00
Merge remote-tracking branch 'upstream/main' into Finetuning_deepening
This commit is contained in:
commit
7e90b15671
24
Makefile
24
Makefile
@ -70,7 +70,7 @@ $(BUILDDIR)/dense_%.o: $(DENSE_SRCDIR)/%.c $(DENSE_SRCDIR)/include/%.h
|
||||
#
|
||||
# Build cnn
|
||||
#
|
||||
cnn: $(BUILDDIR)/cnn-main $(BUILDDIR)/cnn-main-cuda $(BUILDDIR)/cnn-preview $(BUILDDIR)/cnn-export;
|
||||
cnn: $(BUILDDIR)/cnn-main $(BUILDDIR)/cnn-main-cuda $(BUILDDIR)/cnn-preview $(BUILDDIR)/cnn-export $(BUILDDIR)/cnn-export-cuda;
|
||||
|
||||
$(BUILDDIR)/cnn-main: $(CNN_SRCDIR)/main.c \
|
||||
$(BUILDDIR)/cnn_backpropagation.o \
|
||||
@ -147,6 +147,28 @@ $(BUILDDIR)/cnn-export: $(CNN_SRCDIR)/export.c \
|
||||
$(CC) $^ -o $@ $(CFLAGS) $(LD_CFLAGS)
|
||||
|
||||
|
||||
ifdef NVCC_INSTALLED
|
||||
$(BUILDDIR)/cnn-export-cuda: $(CNN_SRCDIR)/export.c \
|
||||
$(BUILDDIR)/cnn_cuda_backpropagation.o \
|
||||
$(BUILDDIR)/cnn_cuda_convolution.o \
|
||||
$(BUILDDIR)/cnn_neuron_io.cuda.o \
|
||||
$(BUILDDIR)/cnn_cuda_function.o \
|
||||
$(BUILDDIR)/cnn_free.cuda.o \
|
||||
$(BUILDDIR)/cnn_cuda_make.o \
|
||||
$(BUILDDIR)/cnn_cnn.cuda.o \
|
||||
$(BUILDDIR)/cnn_utils.cuda.o \
|
||||
$(BUILDDIR)/cnn_jpeg.cuda.o \
|
||||
\
|
||||
$(BUILDDIR)/cuda_memory_management.o \
|
||||
$(BUILDDIR)/colors.cuda.o \
|
||||
$(BUILDDIR)/mnist.o \
|
||||
$(BUILDDIR)/cuda_utils.o
|
||||
$(NVCC) $^ -o $@ $(NVCCFLAGS) $(LD_NVCCFLAGS)
|
||||
else
|
||||
$(BUILDDIR)/cnn-export-cuda: $(CNN_SRCDIR)/export.c
|
||||
@echo "$(NVCC) not found, skipping"
|
||||
endif
|
||||
|
||||
$(BUILDDIR)/cnn_%.o: $(CNN_SRCDIR)/%.c $(CNN_SRCDIR)/include/%.h
|
||||
$(CC) -c $< -o $@ $(CFLAGS)
|
||||
|
||||
|
@ -82,7 +82,7 @@ void print_poids_ker_cnn(char* modele) {
|
||||
|
||||
|
||||
void write_image(float** data, int width, int height, char* base_filename, int layer_id, int kernel_id) {
|
||||
int filename_length = strlen(base_filename) + (int)log10(layer_id+1)+1 + (int)log10(kernel_id+1)+1 + 21;
|
||||
int filename_length = strlen(base_filename) + (int)log10(layer_id+1)+1 + (int)log10(kernel_id+1)+1 + 21 +1;
|
||||
char* filename = (char*)malloc(sizeof(char)*filename_length);
|
||||
|
||||
sprintf(filename, "%s_layer-%d_feature-%d.jpeg", base_filename, layer_id, kernel_id);
|
||||
|
@ -183,17 +183,17 @@ void make_max_pooling(float*** input, float*** output, int size, int output_dept
|
||||
* Dense
|
||||
*/
|
||||
#ifdef __CUDACC__
|
||||
__global__ void make_dense_kernel(Kernel_nn* kernel, float* input, float* output, int size_input, int size_output) {
|
||||
__global__ void make_dense_kernel(float** weights, float* bias, float* input, float* output, int size_input, int size_output) {
|
||||
// Équivalents respectifs de i, j et k dans la boucle effectuée par le cpu
|
||||
int idx = threadIdx.x + blockDim.x*blockIdx.x; // < size_output
|
||||
|
||||
if (idx >= size_output) {
|
||||
return;
|
||||
}
|
||||
float f = kernel->bias[idx];
|
||||
float f = bias[idx];
|
||||
|
||||
for (int j=0; j < size_input; j++) {
|
||||
f += kernel->weights[j][idx]*input[j];
|
||||
f += weights[j][idx]*input[j];
|
||||
}
|
||||
output[idx] = f;
|
||||
}
|
||||
@ -203,7 +203,7 @@ void make_dense_device(Kernel_nn* kernel, float* input, float* output, int size_
|
||||
dim3 gridSize(i_div_up(size_output, BLOCKSIZE_x*BLOCKSIZE_y), 1, 1);
|
||||
dim3 blockSize(BLOCKSIZE_x*BLOCKSIZE_y, 1, BLOCKSIZE_z);
|
||||
|
||||
make_dense_kernel<<<gridSize, blockSize>>>(kernel, input, output, size_input, size_output);
|
||||
make_dense_kernel<<<gridSize, blockSize>>>(kernel->weights, kernel->bias, input, output, size_input, size_output);
|
||||
gpuErrchk( cudaPeekAtLastError() );
|
||||
gpuErrchk( cudaDeviceSynchronize() );
|
||||
}
|
||||
|
@ -183,17 +183,17 @@ void make_max_pooling(float*** input, float*** output, int size, int output_dept
|
||||
* Dense
|
||||
*/
|
||||
#ifdef __CUDACC__
|
||||
__global__ void make_dense_kernel(Kernel_nn* kernel, float* input, float* output, int size_input, int size_output) {
|
||||
__global__ void make_dense_kernel(float** weights, float* bias, float* input, float* output, int size_input, int size_output) {
|
||||
// Équivalents respectifs de i, j et k dans la boucle effectuée par le cpu
|
||||
int idx = threadIdx.x + blockDim.x*blockIdx.x; // < size_output
|
||||
|
||||
if (idx >= size_output) {
|
||||
return;
|
||||
}
|
||||
float f = kernel->bias[idx];
|
||||
float f = bias[idx];
|
||||
|
||||
for (int j=0; j < size_input; j++) {
|
||||
f += kernel->weights[j][idx]*input[j];
|
||||
f += weights[j][idx]*input[j];
|
||||
}
|
||||
output[idx] = f;
|
||||
}
|
||||
@ -203,7 +203,7 @@ void make_dense_device(Kernel_nn* kernel, float* input, float* output, int size_
|
||||
dim3 gridSize(i_div_up(size_output, BLOCKSIZE_x*BLOCKSIZE_y), 1, 1);
|
||||
dim3 blockSize(BLOCKSIZE_x*BLOCKSIZE_y, 1, BLOCKSIZE_z);
|
||||
|
||||
make_dense_kernel<<<gridSize, blockSize>>>(kernel, input, output, size_input, size_output);
|
||||
make_dense_kernel<<<gridSize, blockSize>>>(kernel->weights, kernel->bias, input, output, size_input, size_output);
|
||||
gpuErrchk( cudaPeekAtLastError() );
|
||||
gpuErrchk( cudaDeviceSynchronize() );
|
||||
}
|
||||
|
Loading…
Reference in New Issue
Block a user