diff --git a/src/cnn/creation.c b/src/cnn/creation.c index d3a7af5..7305602 100644 --- a/src/cnn/creation.c +++ b/src/cnn/creation.c @@ -12,19 +12,19 @@ Network* create_network(int max_size, float learning_rate, int dropout, int init if (dropout < 0 || dropout > 100) { printf("Erreur, la probabilité de dropout n'est pas respecté, elle doit être comprise entre 0 et 100\n"); } - Network* network = (Network*)nalloc(sizeof(Network)); + Network* network = (Network*)nalloc(1, sizeof(Network)); network->learning_rate = learning_rate; network->max_size = max_size; network->dropout = dropout; network->initialisation = initialisation; network->size = 1; - network->input = (float****)nalloc(sizeof(float***)*max_size); - network->input_z = (float****)nalloc(sizeof(float***)*max_size); - network->kernel = (Kernel**)nalloc(sizeof(Kernel*)*(max_size-1)); - network->width = (int*)nalloc(sizeof(int*)*max_size); - network->depth = (int*)nalloc(sizeof(int*)*max_size); + network->input = (float****)nalloc(max_size, sizeof(float***)); + network->input_z = (float****)nalloc(max_size, sizeof(float***)); + network->kernel = (Kernel**)nalloc(max_size-1, sizeof(Kernel*)); + network->width = (int*)nalloc(max_size, sizeof(int*)); + network->depth = (int*)nalloc(max_size, sizeof(int*)); for (int i=0; i < max_size-1; i++) { - network->kernel[i] = (Kernel*)nalloc(sizeof(Kernel)); + network->kernel[i] = (Kernel*)nalloc(1, sizeof(Kernel)); } network->kernel[0]->linearisation = 0; network->width[0] = input_dim; @@ -58,11 +58,11 @@ Network* create_simple_one(float learning_rate, int dropout, int activation, int } void create_a_cube_input_layer(Network* network, int pos, int depth, int dim) { - network->input[pos] = (float***)nalloc(sizeof(float**)*depth); + network->input[pos] = (float***)nalloc(depth, sizeof(float**)); for (int i=0; i < depth; i++) { - network->input[pos][i] = (float**)nalloc(sizeof(float*)*dim); + network->input[pos][i] = (float**)nalloc(dim, sizeof(float*)); for (int j=0; j < dim; j++) { - network->input[pos][i][j] = (float*)nalloc(sizeof(float)*dim); + network->input[pos][i][j] = (float*)nalloc(dim, sizeof(float)); } } network->width[pos] = dim; @@ -70,11 +70,11 @@ void create_a_cube_input_layer(Network* network, int pos, int depth, int dim) { } void create_a_cube_input_z_layer(Network* network, int pos, int depth, int dim) { - network->input_z[pos] = (float***)nalloc(sizeof(float**)*depth); + network->input_z[pos] = (float***)nalloc(depth, sizeof(float**)); for (int i=0; i < depth; i++) { - network->input_z[pos][i] = (float**)nalloc(sizeof(float*)*dim); + network->input_z[pos][i] = (float**)nalloc(dim, sizeof(float*)); for (int j=0; j < dim; j++) { - network->input_z[pos][i][j] = (float*)nalloc(sizeof(float)*dim); + network->input_z[pos][i][j] = (float*)nalloc(dim, sizeof(float)); } } network->width[pos] = dim; @@ -82,17 +82,17 @@ void create_a_cube_input_z_layer(Network* network, int pos, int depth, int dim) } void create_a_line_input_layer(Network* network, int pos, int dim) { - network->input[pos] = (float***)nalloc(sizeof(float**)); - network->input[pos][0] = (float**)nalloc(sizeof(float*)); - network->input[pos][0][0] = (float*)nalloc(sizeof(float)*dim); + network->input[pos] = (float***)nalloc(1, sizeof(float**)); + network->input[pos][0] = (float**)nalloc(1, sizeof(float*)); + network->input[pos][0][0] = (float*)nalloc(dim, sizeof(float)); network->width[pos] = dim; network->depth[pos] = 1; } void create_a_line_input_z_layer(Network* network, int pos, int dim) { - network->input_z[pos] = (float***)nalloc(sizeof(float**)); - network->input_z[pos][0] = (float**)nalloc(sizeof(float*)); - network->input_z[pos][0][0] = (float*)nalloc(sizeof(float)*dim); + network->input_z[pos] = (float***)nalloc(1, sizeof(float**)); + network->input_z[pos][0] = (float**)nalloc(1, sizeof(float*)); + network->input_z[pos][0][0] = (float*)nalloc(dim, sizeof(float)); network->width[pos] = dim; network->depth[pos] = 1; } @@ -157,37 +157,37 @@ void add_convolution(Network* network, int depth_output, int dim_output, int act network->kernel[k_pos]->activation = activation; network->kernel[k_pos]->linearisation = 0; network->kernel[k_pos]->pooling = 0; - network->kernel[k_pos]->cnn = (Kernel_cnn*)nalloc(sizeof(Kernel_cnn)); + network->kernel[k_pos]->cnn = (Kernel_cnn*)nalloc(1, sizeof(Kernel_cnn)); Kernel_cnn* cnn = network->kernel[k_pos]->cnn; cnn->k_size = kernel_size; cnn->rows = depth_input; cnn->columns = depth_output; - cnn->weights = (float****)nalloc(sizeof(float***)*depth_input); - cnn->d_weights = (float****)nalloc(sizeof(float***)*depth_input); + cnn->weights = (float****)nalloc(depth_input, sizeof(float***)); + cnn->d_weights = (float****)nalloc(depth_input, sizeof(float***)); for (int i=0; i < depth_input; i++) { - cnn->weights[i] = (float***)nalloc(sizeof(float**)*depth_output); - cnn->d_weights[i] = (float***)nalloc(sizeof(float**)*depth_output); + cnn->weights[i] = (float***)nalloc(depth_output, sizeof(float**)); + cnn->d_weights[i] = (float***)nalloc(depth_output, sizeof(float**)); for (int j=0; j < depth_output; j++) { - cnn->weights[i][j] = (float**)nalloc(sizeof(float*)*kernel_size); - cnn->d_weights[i][j] = (float**)nalloc(sizeof(float*)*kernel_size); + cnn->weights[i][j] = (float**)nalloc(kernel_size, sizeof(float*)); + cnn->d_weights[i][j] = (float**)nalloc(kernel_size, sizeof(float*)); for (int k=0; k < kernel_size; k++) { - cnn->weights[i][j][k] = (float*)nalloc(sizeof(float)*kernel_size); - cnn->d_weights[i][j][k] = (float*)nalloc(sizeof(float)*kernel_size); + cnn->weights[i][j][k] = (float*)nalloc(kernel_size, sizeof(float)); + cnn->d_weights[i][j][k] = (float*)nalloc(kernel_size, sizeof(float)); for (int l=0; l < kernel_size; l++) { cnn->d_weights[i][j][k][l] = 0.; } } } } - cnn->bias = (float***)nalloc(sizeof(float**)*depth_output); - cnn->d_bias = (float***)nalloc(sizeof(float**)*depth_output); + cnn->bias = (float***)nalloc(depth_output, sizeof(float**)); + cnn->d_bias = (float***)nalloc(depth_output, sizeof(float**)); for (int i=0; i < depth_output; i++) { - cnn->bias[i] = (float**)nalloc(sizeof(float*)*bias_size); - cnn->d_bias[i] = (float**)nalloc(sizeof(float*)*bias_size); + cnn->bias[i] = (float**)nalloc(bias_size, sizeof(float*)); + cnn->d_bias[i] = (float**)nalloc(bias_size, sizeof(float*)); for (int j=0; j < bias_size; j++) { - cnn->bias[i][j] = (float*)nalloc(sizeof(float)*bias_size); - cnn->d_bias[i][j] = (float*)nalloc(sizeof(float)*bias_size); + cnn->bias[i][j] = (float*)nalloc(bias_size, sizeof(float)); + cnn->d_bias[i][j] = (float*)nalloc(bias_size, sizeof(float)); for (int k=0; k < bias_size; k++) { cnn->d_bias[i][j][k] = 0.; } @@ -211,24 +211,24 @@ void add_dense(Network* network, int size_output, int activation) { return; } network->kernel[k_pos]->cnn = NULL; - network->kernel[k_pos]->nn = (Kernel_nn*)nalloc(sizeof(Kernel_nn)); + network->kernel[k_pos]->nn = (Kernel_nn*)nalloc(1, sizeof(Kernel_nn)); Kernel_nn* nn = network->kernel[k_pos]->nn; network->kernel[k_pos]->activation = activation; network->kernel[k_pos]->linearisation = 0; network->kernel[k_pos]->pooling = 0; nn->size_input = size_input; nn->size_output = size_output; - nn->bias = (float*)nalloc(sizeof(float)*size_output); - nn->d_bias = (float*)nalloc(sizeof(float)*size_output); + nn->bias = (float*)nalloc(size_output, sizeof(float)); + nn->d_bias = (float*)nalloc(size_output, sizeof(float)); for (int i=0; i < size_output; i++) { nn->d_bias[i] = 0.; } - nn->weights = (float**)nalloc(sizeof(float*)*size_input); - nn->d_weights = (float**)nalloc(sizeof(float*)*size_input); + nn->weights = (float**)nalloc(size_input, sizeof(float*)); + nn->d_weights = (float**)nalloc(size_input, sizeof(float*)); for (int i=0; i < size_input; i++) { - nn->weights[i] = (float*)nalloc(sizeof(float)*size_output); - nn->d_weights[i] = (float*)nalloc(sizeof(float)*size_output); + nn->weights[i] = (float*)nalloc(size_output, sizeof(float)); + nn->d_weights[i] = (float*)nalloc(size_output, sizeof(float)); for (int j=0; j < size_output; j++) { nn->d_weights[i][j] = 0.; } @@ -252,7 +252,7 @@ void add_dense_linearisation(Network* network, int size_output, int activation) return; } network->kernel[k_pos]->cnn = NULL; - network->kernel[k_pos]->nn = (Kernel_nn*)nalloc(sizeof(Kernel_nn)); + network->kernel[k_pos]->nn = (Kernel_nn*)nalloc(1, sizeof(Kernel_nn)); Kernel_nn* nn = network->kernel[k_pos]->nn; network->kernel[k_pos]->activation = activation; network->kernel[k_pos]->linearisation = 1; @@ -260,16 +260,16 @@ void add_dense_linearisation(Network* network, int size_output, int activation) nn->size_input = size_input; nn->size_output = size_output; - nn->bias = (float*)nalloc(sizeof(float)*size_output); - nn->d_bias = (float*)nalloc(sizeof(float)*size_output); + nn->bias = (float*)nalloc(size_output, sizeof(float)); + nn->d_bias = (float*)nalloc(size_output, sizeof(float)); for (int i=0; i < size_output; i++) { nn->d_bias[i] = 0.; } - nn->weights = (float**)nalloc(sizeof(float*)*size_input); - nn->d_weights = (float**)nalloc(sizeof(float*)*size_input); + nn->weights = (float**)nalloc(size_input, sizeof(float*)); + nn->d_weights = (float**)nalloc(size_input, sizeof(float*)); for (int i=0; i < size_input; i++) { - nn->weights[i] = (float*)nalloc(sizeof(float)*size_output); - nn->d_weights[i] = (float*)nalloc(sizeof(float)*size_output); + nn->weights[i] = (float*)nalloc(size_output, sizeof(float)); + nn->d_weights[i] = (float*)nalloc(size_output, sizeof(float)); for (int j=0; j < size_output; j++) { nn->d_weights[i][j] = 0.; } diff --git a/src/cnn/make.c b/src/cnn/make.c index b783808..0d62d5b 100644 --- a/src/cnn/make.c +++ b/src/cnn/make.c @@ -234,7 +234,7 @@ void make_dense(Kernel_nn* kernel, float* input, float* output, int size_input, * Dense linearised */ #ifdef __CUDACC__ -__global__ void make_dense_linearised_kernel(Kernel_nn* kernel, float*** input, float* output, int depth_input, int dim_input, int size_output) { +__global__ void make_dense_linearised_kernel(float** weights, float*** input, float* output, int depth_input, int dim_input, int size_output) { // Équivalents respectifs de i, j et k dans la boucle effectuée par le cpu int idx = threadIdx.x + blockDim.x*blockIdx.x; // < size_output @@ -246,7 +246,7 @@ __global__ void make_dense_linearised_kernel(Kernel_nn* kernel, float*** input, for (int i=0; i < depth_input; i++) { for (int j=0; j < dim_input; j++) { for (int k=0; k < dim_input; k++) { - f += input[i][j][k]*kernel->weights[k + j*dim_input + i*depth_input][idx]; + f += input[i][j][k]*weights[k + j*dim_input + i*depth_input][idx]; } } } @@ -258,7 +258,7 @@ void make_dense_linearised_device(Kernel_nn* kernel, float*** input, float* outp dim3 gridSize(i_div_up(size_output, BLOCKSIZE_x*BLOCKSIZE_y), 1, 1); dim3 blockSize(BLOCKSIZE_x*BLOCKSIZE_y, 1, BLOCKSIZE_z); - make_dense_linearised_kernel<<>>(kernel, input, output, depth_input, dim_input, size_output); + make_dense_linearised_kernel<<>>(kernel->weights, input, output, depth_input, dim_input, size_output); gpuErrchk( cudaPeekAtLastError() ); gpuErrchk( cudaDeviceSynchronize() ); } diff --git a/src/cnn/make.cu b/src/cnn/make.cu index b783808..0d62d5b 100644 --- a/src/cnn/make.cu +++ b/src/cnn/make.cu @@ -234,7 +234,7 @@ void make_dense(Kernel_nn* kernel, float* input, float* output, int size_input, * Dense linearised */ #ifdef __CUDACC__ -__global__ void make_dense_linearised_kernel(Kernel_nn* kernel, float*** input, float* output, int depth_input, int dim_input, int size_output) { +__global__ void make_dense_linearised_kernel(float** weights, float*** input, float* output, int depth_input, int dim_input, int size_output) { // Équivalents respectifs de i, j et k dans la boucle effectuée par le cpu int idx = threadIdx.x + blockDim.x*blockIdx.x; // < size_output @@ -246,7 +246,7 @@ __global__ void make_dense_linearised_kernel(Kernel_nn* kernel, float*** input, for (int i=0; i < depth_input; i++) { for (int j=0; j < dim_input; j++) { for (int k=0; k < dim_input; k++) { - f += input[i][j][k]*kernel->weights[k + j*dim_input + i*depth_input][idx]; + f += input[i][j][k]*weights[k + j*dim_input + i*depth_input][idx]; } } } @@ -258,7 +258,7 @@ void make_dense_linearised_device(Kernel_nn* kernel, float*** input, float* outp dim3 gridSize(i_div_up(size_output, BLOCKSIZE_x*BLOCKSIZE_y), 1, 1); dim3 blockSize(BLOCKSIZE_x*BLOCKSIZE_y, 1, BLOCKSIZE_z); - make_dense_linearised_kernel<<>>(kernel, input, output, depth_input, dim_input, size_output); + make_dense_linearised_kernel<<>>(kernel->weights, input, output, depth_input, dim_input, size_output); gpuErrchk( cudaPeekAtLastError() ); gpuErrchk( cudaDeviceSynchronize() ); } diff --git a/src/cnn/neuron_io.c b/src/cnn/neuron_io.c index 6102fda..7fc9ab9 100644 --- a/src/cnn/neuron_io.c +++ b/src/cnn/neuron_io.c @@ -141,7 +141,7 @@ void write_couche(Network* network, int indice_couche, int type_couche, FILE* pt Network* read_network(char* filename) { FILE *ptr; - Network* network = (Network*)nalloc(sizeof(Network)); + Network* network = (Network*)nalloc(1, sizeof(Network)); ptr = fopen(filename, "rb"); @@ -167,8 +167,8 @@ Network* read_network(char* filename) { network->dropout = dropout; // Lecture de la taille de l'entrée des différentes matrices - network->width = (int*)nalloc(sizeof(int)*size); - network->depth = (int*)nalloc(sizeof(int)*size); + network->width = (int*)nalloc(size, sizeof(int)); + network->depth = (int*)nalloc(size, sizeof(int)); for (int i=0; i < (int)size; i++) { fread(&tmp, sizeof(uint32_t), 1, ptr); @@ -186,19 +186,19 @@ Network* read_network(char* filename) { } // Lecture de chaque couche - network->kernel = (Kernel**)nalloc(sizeof(Kernel*)*(size-1)); + network->kernel = (Kernel**)nalloc(size-1, sizeof(Kernel*)); for (int i=0; i < (int)size-1; i++) { network->kernel[i] = read_kernel(type_couche[i], network->width[i+1], ptr); } - network->input = (float****)nalloc(sizeof(float***)*size); + network->input = (float****)nalloc(size, sizeof(float***)); for (int i=0; i < (int)size; i++) { // input[size][couche->depth][couche->dim][couche->dim] - network->input[i] = (float***)nalloc(sizeof(float**)*network->depth[i]); + network->input[i] = (float***)nalloc(network->depth[i], sizeof(float**)); for (int j=0; j < network->depth[i]; j++) { - network->input[i][j] = (float**)nalloc(sizeof(float*)*network->width[i]); + network->input[i][j] = (float**)nalloc(network->width[i], sizeof(float*)); for (int k=0; k < network->width[i]; k++) { - network->input[i][j][k] = (float*)nalloc(sizeof(float)*network->width[i]); + network->input[i][j][k] = (float*)nalloc(network->width[i], sizeof(float)); for (int l=0; l < network->width[i]; l++) { network->input[i][j][k][l] = 0.; } @@ -206,13 +206,13 @@ Network* read_network(char* filename) { } } - network->input_z = (float****)nalloc(sizeof(float***)*size); + network->input_z = (float****)nalloc(size, sizeof(float***)); for (int i=0; i < (int)size; i++) { // input[size][couche->depth][couche->dim][couche->dim] - network->input_z[i] = (float***)nalloc(sizeof(float**)*network->depth[i]); + network->input_z[i] = (float***)nalloc(network->depth[i], sizeof(float**)); for (int j=0; j < network->depth[i]; j++) { - network->input_z[i][j] = (float**)nalloc(sizeof(float*)*network->width[i]); + network->input_z[i][j] = (float**)nalloc(network->width[i], sizeof(float*)); for (int k=0; k < network->width[i]; k++) { - network->input_z[i][j][k] = (float*)nalloc(sizeof(float)*network->width[i]); + network->input_z[i][j][k] = (float*)nalloc(network->width[i], sizeof(float)); for (int l=0; l < network->width[i]; l++) { network->input_z[i][j][k][l] = 0.; } @@ -225,10 +225,10 @@ Network* read_network(char* filename) { } Kernel* read_kernel(int type_couche, int output_dim, FILE* ptr) { - Kernel* kernel = (Kernel*)nalloc(sizeof(Kernel)); + Kernel* kernel = (Kernel*)nalloc(1, sizeof(Kernel)); if (type_couche == 0) { // Cas du CNN // Lecture du "Pré-corps" - kernel->cnn = (Kernel_cnn*)nalloc(sizeof(Kernel_cnn)); + kernel->cnn = (Kernel_cnn*)nalloc(1, sizeof(Kernel_cnn)); kernel->nn = NULL; uint32_t buffer[5]; fread(&buffer, sizeof(buffer), 1, ptr); @@ -243,14 +243,14 @@ Kernel* read_kernel(int type_couche, int output_dim, FILE* ptr) { Kernel_cnn* cnn = kernel->cnn; float tmp; - cnn->bias = (float***)nalloc(sizeof(float**)*cnn->columns); - cnn->d_bias = (float***)nalloc(sizeof(float**)*cnn->columns); + cnn->bias = (float***)nalloc(cnn->columns, sizeof(float**)); + cnn->d_bias = (float***)nalloc(cnn->columns, sizeof(float**)); for (int i=0; i < cnn->columns; i++) { - cnn->bias[i] = (float**)nalloc(sizeof(float*)*output_dim); - cnn->d_bias[i] = (float**)nalloc(sizeof(float*)*output_dim); + cnn->bias[i] = (float**)nalloc(output_dim, sizeof(float*)); + cnn->d_bias[i] = (float**)nalloc(output_dim, sizeof(float*)); for (int j=0; j < output_dim; j++) { - cnn->bias[i][j] = (float*)nalloc(sizeof(float)*output_dim); - cnn->d_bias[i][j] = (float*)nalloc(sizeof(float)*output_dim); + cnn->bias[i][j] = (float*)nalloc(output_dim, sizeof(float)); + cnn->d_bias[i][j] = (float*)nalloc(output_dim, sizeof(float)); for (int k=0; k < output_dim; k++) { fread(&tmp, sizeof(tmp), 1, ptr); cnn->bias[i][j][k] = tmp; @@ -259,17 +259,17 @@ Kernel* read_kernel(int type_couche, int output_dim, FILE* ptr) { } } - cnn->weights = (float****)nalloc(sizeof(float***)*cnn->rows); - cnn->d_weights = (float****)nalloc(sizeof(float***)*cnn->rows); + cnn->weights = (float****)nalloc(cnn->rows, sizeof(float***)); + cnn->d_weights = (float****)nalloc(cnn->rows, sizeof(float***)); for (int i=0; i < cnn->rows; i++) { - cnn->weights[i] = (float***)nalloc(sizeof(float**)*cnn->columns); - cnn->d_weights[i] = (float***)nalloc(sizeof(float**)*cnn->columns); + cnn->weights[i] = (float***)nalloc(cnn->columns, sizeof(float**)); + cnn->d_weights[i] = (float***)nalloc(cnn->columns, sizeof(float**)); for (int j=0; j < cnn->columns; j++) { - cnn->weights[i][j] = (float**)nalloc(sizeof(float*)*cnn->k_size); - cnn->d_weights[i][j] = (float**)nalloc(sizeof(float*)*cnn->k_size); + cnn->weights[i][j] = (float**)nalloc(cnn->k_size, sizeof(float*)); + cnn->d_weights[i][j] = (float**)nalloc(cnn->k_size, sizeof(float*)); for (int k=0; k < cnn->k_size; k++) { - cnn->weights[i][j][k] = (float*)nalloc(sizeof(float)*cnn->k_size); - cnn->d_weights[i][j][k] = (float*)nalloc(sizeof(float)*cnn->k_size); + cnn->weights[i][j][k] = (float*)nalloc(cnn->k_size, sizeof(float)); + cnn->d_weights[i][j][k] = (float*)nalloc(cnn->k_size, sizeof(float)); for (int l=0; l < cnn->k_size; l++) { fread(&tmp, sizeof(tmp), 1, ptr); cnn->weights[i][j][k][l] = tmp; @@ -280,7 +280,7 @@ Kernel* read_kernel(int type_couche, int output_dim, FILE* ptr) { } } else if (type_couche == 1) { // Cas du NN // Lecture du "Pré-corps" - kernel->nn = (Kernel_nn*)nalloc(sizeof(Kernel_nn)); + kernel->nn = (Kernel_nn*)nalloc(1, sizeof(Kernel_nn)); kernel->cnn = NULL; uint32_t buffer[4]; fread(&buffer, sizeof(buffer), 1, ptr); @@ -294,19 +294,19 @@ Kernel* read_kernel(int type_couche, int output_dim, FILE* ptr) { Kernel_nn* nn = kernel->nn; float tmp; - nn->bias = (float*)nalloc(sizeof(float)*nn->size_output); - nn->d_bias = (float*)nalloc(sizeof(float)*nn->size_output); + nn->bias = (float*)nalloc(nn->size_output, sizeof(float)); + nn->d_bias = (float*)nalloc(nn->size_output, sizeof(float)); for (int i=0; i < nn->size_output; i++) { fread(&tmp, sizeof(tmp), 1, ptr); nn->bias[i] = tmp; nn->d_bias[i] = 0.; } - nn->weights = (float**)nalloc(sizeof(float*)*nn->size_input); - nn->d_weights = (float**)nalloc(sizeof(float*)*nn->size_input); + nn->weights = (float**)nalloc(nn->size_input, sizeof(float*)); + nn->d_weights = (float**)nalloc(nn->size_input, sizeof(float*)); for (int i=0; i < nn->size_input; i++) { - nn->weights[i] = (float*)nalloc(sizeof(float)*nn->size_output); - nn->d_weights[i] = (float*)nalloc(sizeof(float)*nn->size_output); + nn->weights[i] = (float*)nalloc(nn->size_output, sizeof(float)); + nn->d_weights[i] = (float*)nalloc(nn->size_output, sizeof(float)); for (int j=0; j < nn->size_output; j++) { fread(&tmp, sizeof(tmp), 1, ptr); nn->weights[i][j] = tmp; diff --git a/src/cnn/utils.c b/src/cnn/utils.c index 81ff515..1a211ba 100644 --- a/src/cnn/utils.c +++ b/src/cnn/utils.c @@ -96,7 +96,7 @@ bool equals_networks(Network* network1, Network* network2) { Network* copy_network(Network* network) { - Network* network_cp = (Network*)nalloc(sizeof(Network)); + Network* network_cp = (Network*)nalloc(1, sizeof(Network)); // Paramètre du réseau int size = network->size; // Paramètres des couches NN @@ -114,17 +114,17 @@ Network* copy_network(Network* network) { copyVar(max_size); copyVar(size); - network_cp->width = (int*)nalloc(sizeof(int)*size); - network_cp->depth = (int*)nalloc(sizeof(int)*size); + network_cp->width = (int*)nalloc(size, sizeof(int)); + network_cp->depth = (int*)nalloc(size, sizeof(int)); for (int i=0; i < size; i++) { copyVar(width[i]); copyVar(depth[i]); } - network_cp->kernel = (Kernel**)nalloc(sizeof(Kernel*)*(size-1)); + network_cp->kernel = (Kernel**)nalloc(size-1, sizeof(Kernel*)); for (int i=0; i < size-1; i++) { - network_cp->kernel[i] = (Kernel*)nalloc(sizeof(Kernel)); + network_cp->kernel[i] = (Kernel*)nalloc(1, sizeof(Kernel)); if (!network->kernel[i]->nn && !network->kernel[i]->cnn) { // Cas de la couche de linéarisation copyVar(kernel[i]->pooling); copyVar(kernel[i]->activation); @@ -141,23 +141,23 @@ Network* copy_network(Network* network) { size_output = network->kernel[i]->nn->size_output; network_cp->kernel[i]->cnn = NULL; - network_cp->kernel[i]->nn = (Kernel_nn*)nalloc(sizeof(Kernel_nn)); + network_cp->kernel[i]->nn = (Kernel_nn*)nalloc(1, sizeof(Kernel_nn)); copyVar(kernel[i]->nn->size_input); copyVar(kernel[i]->nn->size_output); - network_cp->kernel[i]->nn->bias = (float*)nalloc(sizeof(float)*size_output); - network_cp->kernel[i]->nn->d_bias = (float*)nalloc(sizeof(float)*size_output); + network_cp->kernel[i]->nn->bias = (float*)nalloc(size_output, sizeof(float)); + network_cp->kernel[i]->nn->d_bias = (float*)nalloc(size_output, sizeof(float)); for (int j=0; j < size_output; j++) { copyVar(kernel[i]->nn->bias[j]); network_cp->kernel[i]->nn->d_bias[j] = 0.; } - network_cp->kernel[i]->nn->weights = (float**)nalloc(sizeof(float*)*size_input); - network_cp->kernel[i]->nn->d_weights = (float**)nalloc(sizeof(float*)*size_input); + network_cp->kernel[i]->nn->weights = (float**)nalloc(size_input, sizeof(float*)); + network_cp->kernel[i]->nn->d_weights = (float**)nalloc(size_input, sizeof(float*)); for (int j=0; j < size_input; j++) { - network_cp->kernel[i]->nn->weights[j] = (float*)nalloc(sizeof(float)*size_output); - network_cp->kernel[i]->nn->d_weights[j] = (float*)nalloc(sizeof(float)*size_output); + network_cp->kernel[i]->nn->weights[j] = (float*)nalloc(size_output, sizeof(float)); + network_cp->kernel[i]->nn->d_weights[j] = (float*)nalloc(size_output, sizeof(float)); for (int k=0; k < size_output; k++) { copyVar(kernel[i]->nn->weights[j][k]); network_cp->kernel[i]->nn->d_weights[j][k] = 0.; @@ -176,20 +176,20 @@ Network* copy_network(Network* network) { network_cp->kernel[i]->nn = NULL; - network_cp->kernel[i]->cnn = (Kernel_cnn*)nalloc(sizeof(Kernel_cnn)); + network_cp->kernel[i]->cnn = (Kernel_cnn*)nalloc(1, sizeof(Kernel_cnn)); copyVar(kernel[i]->cnn->rows); copyVar(kernel[i]->cnn->k_size); copyVar(kernel[i]->cnn->columns); - network_cp->kernel[i]->cnn->bias = (float***)nalloc(sizeof(float**)*columns); - network_cp->kernel[i]->cnn->d_bias = (float***)nalloc(sizeof(float**)*columns); + network_cp->kernel[i]->cnn->bias = (float***)nalloc(columns, sizeof(float**)); + network_cp->kernel[i]->cnn->d_bias = (float***)nalloc(columns, sizeof(float**)); for (int j=0; j < columns; j++) { - network_cp->kernel[i]->cnn->bias[j] = (float**)nalloc(sizeof(float*)*output_dim); - network_cp->kernel[i]->cnn->d_bias[j] = (float**)nalloc(sizeof(float*)*output_dim); + network_cp->kernel[i]->cnn->bias[j] = (float**)nalloc(output_dim, sizeof(float*)); + network_cp->kernel[i]->cnn->d_bias[j] = (float**)nalloc(output_dim, sizeof(float*)); for (int k=0; k < output_dim; k++) { - network_cp->kernel[i]->cnn->bias[j][k] = (float*)nalloc(sizeof(float)*output_dim); - network_cp->kernel[i]->cnn->d_bias[j][k] = (float*)nalloc(sizeof(float)*output_dim); + network_cp->kernel[i]->cnn->bias[j][k] = (float*)nalloc(output_dim, sizeof(float)); + network_cp->kernel[i]->cnn->d_bias[j][k] = (float*)nalloc(output_dim, sizeof(float)); for (int l=0; l < output_dim; l++) { copyVar(kernel[i]->cnn->bias[j][k][l]); network_cp->kernel[i]->cnn->d_bias[j][k][l] = 0.; @@ -197,17 +197,17 @@ Network* copy_network(Network* network) { } } - network_cp->kernel[i]->cnn->weights = (float****)nalloc(sizeof(float***)*rows); - network_cp->kernel[i]->cnn->d_weights = (float****)nalloc(sizeof(float***)*rows); + network_cp->kernel[i]->cnn->weights = (float****)nalloc(rows, sizeof(float***)); + network_cp->kernel[i]->cnn->d_weights = (float****)nalloc(rows, sizeof(float***)); for (int j=0; j < rows; j++) { - network_cp->kernel[i]->cnn->weights[j] = (float***)nalloc(sizeof(float**)*columns); - network_cp->kernel[i]->cnn->d_weights[j] = (float***)nalloc(sizeof(float**)*columns); + network_cp->kernel[i]->cnn->weights[j] = (float***)nalloc(columns, sizeof(float**)); + network_cp->kernel[i]->cnn->d_weights[j] = (float***)nalloc(columns, sizeof(float**)); for (int k=0; k < columns; k++) { - network_cp->kernel[i]->cnn->weights[j][k] = (float**)nalloc(sizeof(float*)*k_size); - network_cp->kernel[i]->cnn->d_weights[j][k] = (float**)nalloc(sizeof(float*)*k_size); + network_cp->kernel[i]->cnn->weights[j][k] = (float**)nalloc(k_size, sizeof(float*)); + network_cp->kernel[i]->cnn->d_weights[j][k] = (float**)nalloc(k_size, sizeof(float*)); for (int l=0; l < k_size; l++) { - network_cp->kernel[i]->cnn->weights[j][k][l] = (float*)nalloc(sizeof(float)*k_size); - network_cp->kernel[i]->cnn->d_weights[j][k][l] = (float*)nalloc(sizeof(float)*k_size); + network_cp->kernel[i]->cnn->weights[j][k][l] = (float*)nalloc(k_size, sizeof(float)); + network_cp->kernel[i]->cnn->d_weights[j][k][l] = (float*)nalloc(k_size, sizeof(float)); for (int m=0; m < k_size; m++) { copyVar(kernel[i]->cnn->weights[j][k][l][m]); network_cp->kernel[i]->cnn->d_weights[j][k][l][m] = 0.; @@ -218,13 +218,13 @@ Network* copy_network(Network* network) { } } - network_cp->input = (float****)nalloc(sizeof(float***)*size); + network_cp->input = (float****)nalloc(size, sizeof(float***)); for (int i=0; i < size; i++) { // input[size][couche->depth][couche->dim][couche->dim] - network_cp->input[i] = (float***)nalloc(sizeof(float**)*network->depth[i]); + network_cp->input[i] = (float***)nalloc(network->depth[i], sizeof(float**)); for (int j=0; j < network->depth[i]; j++) { - network_cp->input[i][j] = (float**)nalloc(sizeof(float*)*network->width[i]); + network_cp->input[i][j] = (float**)nalloc(network->width[i], sizeof(float*)); for (int k=0; k < network->width[i]; k++) { - network_cp->input[i][j][k] = (float*)nalloc(sizeof(float)*network->width[i]); + network_cp->input[i][j][k] = (float*)nalloc(network->width[i], sizeof(float)); for (int l=0; l < network->width[i]; l++) { network_cp->input[i][j][k][l] = 0.; } @@ -232,13 +232,13 @@ Network* copy_network(Network* network) { } } - network_cp->input_z = (float****)nalloc(sizeof(float***)*size); + network_cp->input_z = (float****)nalloc(size, sizeof(float***)); for (int i=0; i < size; i++) { // input_z[size][couche->depth][couche->dim][couche->dim] - network_cp->input_z[i] = (float***)nalloc(sizeof(float**)*network->depth[i]); + network_cp->input_z[i] = (float***)nalloc(network->depth[i], sizeof(float**)); for (int j=0; j < network->depth[i]; j++) { - network_cp->input_z[i][j] = (float**)nalloc(sizeof(float*)*network->width[i]); + network_cp->input_z[i][j] = (float**)nalloc(network->width[i], sizeof(float*)); for (int k=0; k < network->width[i]; k++) { - network_cp->input_z[i][j][k] = (float*)nalloc(sizeof(float)*network->width[i]); + network_cp->input_z[i][j][k] = (float*)nalloc(network->width[i], sizeof(float)); for (int l=0; l < network->width[i]; l++) { network_cp->input_z[i][j][k][l] = 0.; } diff --git a/src/include/memory_management.h b/src/include/memory_management.h index 99538f2..008a2b0 100644 --- a/src/include/memory_management.h +++ b/src/include/memory_management.h @@ -67,7 +67,7 @@ Memory* create_memory_block(size_t size); /* * Allouer un élément de taille size dans mem */ -void* allocate_memory(size_t size, Memory* mem); +void* allocate_memory(int nb_elements, size_t size, Memory* mem); /* * Essayer de libérer le pointeur représenté par ptr dans mem @@ -80,7 +80,7 @@ extern "C" /* * Alloue de la mémoire partagée CUDA si CUDA est activé */ -void* nalloc(size_t sz); +void* nalloc(int nb_elements, size_t size); #ifdef __CUDACC__ extern "C" diff --git a/src/memory_management.c b/src/memory_management.c index 8d2d52a..b07b3ca 100644 --- a/src/memory_management.c +++ b/src/memory_management.c @@ -69,20 +69,33 @@ Memory* create_memory_block(size_t size) { } -void* allocate_memory(size_t size, Memory* mem) { +void* allocate_memory(int nb_elements, size_t size, Memory* mem) { + /* + * cursor_aligned pointe vers le premier emplacement qui pourrait être utilisé (de manière alignée). + * en effet, la mémoire nécessite d'être alignée avec CUDA: + * https://docs.nvidia.com/cuda/cuda-c-programming-guide/index.html#device-memory-accesses + */ + void* aligned_cursor = mem->cursor; + #ifdef __CUDACC__ + // Cela devrait être faisable avec opérateurs binaires directement, mais on préfèrera quelque chose de lisible et vérifiable + if (((intptr_t)mem->cursor) %size != 0) { + if (size == 2 || size == 4 || size == 8 || size == 16) + aligned_cursor = (void*)(((intptr_t)mem->cursor) + (size - (((intptr_t)mem->cursor) %size))); + } + #endif // Si il y a suffisamment de mémoire disponible - if (mem->size - ((intptr_t)mem->cursor - (intptr_t)mem->start) >= size) { - void* ptr = mem->cursor; - mem->cursor = (void*)((intptr_t)mem->cursor + size); // On décale le curseur de la taille allouée + if (mem->size - ((intptr_t)aligned_cursor - (intptr_t)mem->start) >= nb_elements*size) { + void* ptr = aligned_cursor; + mem->cursor = (void*)((intptr_t)aligned_cursor + nb_elements*size); // On décale le curseur de la taille allouée mem->nb_alloc++; return ptr; } else { - //printf("Mémoire disponible: %ld. Nécessaire: %ld\n", mem->size - ((intptr_t)mem->cursor - (intptr_t)mem->start), size); + //printf("Mémoire disponible: %ld. Nécessaire: %ld\n", mem->size - ((intptr_t)mem->cursor - (intptr_t)mem->start), nb_elements*size); // Sinon on continue sur l'élément suivant de la liste if (!mem->next) { - mem->next = create_memory_block(MEMORY_BLOCK < size ? size : MEMORY_BLOCK); + mem->next = create_memory_block(MEMORY_BLOCK < nb_elements*size ? nb_elements*size : MEMORY_BLOCK); } - return allocate_memory(size, mem->next); + return allocate_memory(nb_elements, size, mem->next); } } @@ -118,21 +131,21 @@ Memory* free_memory(void* ptr, Memory* mem) { #ifdef __CUDACC__ extern "C" #endif -void* nalloc(size_t sz) { +void* nalloc(int nb_elements, size_t size) { #if defined(__CUDACC__) || defined(TEST_MEMORY_MANAGEMENT) pthread_mutex_lock(&memory_lock); if (!memory) { // We allocate a new memory block - memory = create_memory_block(MEMORY_BLOCK < sz ? sz : MEMORY_BLOCK); + memory = create_memory_block(MEMORY_BLOCK < nb_elements*size ? nb_elements*size : MEMORY_BLOCK); } //printf("Distinct allocations: %d Blocks: %d\n", get_distinct_allocations(memory), get_length(memory)); //printf("Requested memory of size %ld\n", sz); - void* ptr = allocate_memory(sz, memory); + void* ptr = allocate_memory(nb_elements, size, memory); pthread_mutex_unlock(&memory_lock); return ptr; #else - void* ptr = malloc(sz); + void* ptr = malloc(size*nb_elements); return ptr; #endif } diff --git a/src/memory_management.cu b/src/memory_management.cu index 8d2d52a..6d2791e 100644 --- a/src/memory_management.cu +++ b/src/memory_management.cu @@ -69,20 +69,34 @@ Memory* create_memory_block(size_t size) { } -void* allocate_memory(size_t size, Memory* mem) { +void* allocate_memory(int nb_elements, size_t size, Memory* mem) { + /* + * cursor_aligned pointe vers le premier emplacement qui pourrait être utilisé (de manière alignée). + * en effet, la mémoire nécessite d'être alignée avec CUDA: + * https://docs.nvidia.com/cuda/cuda-c-programming-guide/index.html#device-memory-accesses + */ + void* aligned_cursor = mem->cursor; + #ifdef __CUDACC__ + // Cela devrait être faisable avec opérateurs binaires directement, mais on préfèrera quelque chose de lisible et vérifiable + if (((intptr_t)mem->cursor) %size != 0) { + if (size == 2 || size == 4 || size == 8 || size == 16) + aligned_cursor = (void*)(((intptr_t)mem->cursor) + (size - (((intptr_t)mem->cursor) %size))); + } + #endif // Si il y a suffisamment de mémoire disponible - if (mem->size - ((intptr_t)mem->cursor - (intptr_t)mem->start) >= size) { - void* ptr = mem->cursor; - mem->cursor = (void*)((intptr_t)mem->cursor + size); // On décale le curseur de la taille allouée + if (mem->size - ((intptr_t)aligned_cursor - (intptr_t)mem->start) >= nb_elements*size) { + void* ptr = aligned_cursor; + mem->cursor = (void*)((intptr_t)aligned_cursor + nb_elements*size); // On décale le curseur de la taille allouée mem->nb_alloc++; return ptr; } else { - //printf("Mémoire disponible: %ld. Nécessaire: %ld\n", mem->size - ((intptr_t)mem->cursor - (intptr_t)mem->start), size); + //printf("Mémoire disponible: %ld. Nécessaire: %ld\n", mem->size - ((intptr_t)mem->cursor - (intptr_t)mem->start), nb_elements*size); // Sinon on continue sur l'élément suivant de la liste if (!mem->next) { - mem->next = create_memory_block(MEMORY_BLOCK < size ? size : MEMORY_BLOCK); + //! WARNING: May cause Infinite allocations when trying to allocate more than MEMORY_BLOCK size at once that is not naturally aligned (CUDA only) + mem->next = create_memory_block(MEMORY_BLOCK < nb_elements*size ? nb_elements*size : MEMORY_BLOCK); } - return allocate_memory(size, mem->next); + return allocate_memory(nb_elements, size, mem->next); } } @@ -118,21 +132,21 @@ Memory* free_memory(void* ptr, Memory* mem) { #ifdef __CUDACC__ extern "C" #endif -void* nalloc(size_t sz) { +void* nalloc(int nb_elements, size_t size) { #if defined(__CUDACC__) || defined(TEST_MEMORY_MANAGEMENT) pthread_mutex_lock(&memory_lock); if (!memory) { // We allocate a new memory block - memory = create_memory_block(MEMORY_BLOCK < sz ? sz : MEMORY_BLOCK); + memory = create_memory_block(MEMORY_BLOCK < nb_elements*size ? nb_elements*size : MEMORY_BLOCK); } //printf("Distinct allocations: %d Blocks: %d\n", get_distinct_allocations(memory), get_length(memory)); //printf("Requested memory of size %ld\n", sz); - void* ptr = allocate_memory(sz, memory); + void* ptr = allocate_memory(nb_elements, size, memory); pthread_mutex_unlock(&memory_lock); return ptr; #else - void* ptr = malloc(sz); + void* ptr = malloc(size*nb_elements); return ptr; #endif } diff --git a/test/cnn_convolution.cu b/test/cnn_convolution.cu index 43eb24c..049657b 100644 --- a/test/cnn_convolution.cu +++ b/test/cnn_convolution.cu @@ -42,11 +42,11 @@ void print_matrix(float** mat, int n, int p) { float*** create_matrix(int n, int p, int q, float max_val) { - float*** matrix = (float***)nalloc(n*sizeof(float**)); + float*** matrix = (float***)nalloc(n, sizeof(float**)); for (int i=0; i < n; i++) { - matrix[i] = (float**)nalloc(sizeof(float*)*p); + matrix[i] = (float**)nalloc(p, sizeof(float*)); for (int j=0; j < p; j++) { - matrix[i][j] = (float*)nalloc(sizeof(float)*q); + matrix[i][j] = (float*)nalloc(q, sizeof(float)); } } @@ -56,11 +56,11 @@ float*** create_matrix(int n, int p, int q, float max_val) { float*** create_empty_matrix(int n, int p, int q) { - float*** matrix = (float***)nalloc(n*sizeof(float**)); + float*** matrix = (float***)nalloc(n, sizeof(float**)); for (int i=0; i < n; i++) { - matrix[i] = (float**)nalloc(sizeof(float*)*p); + matrix[i] = (float**)nalloc(p, sizeof(float*)); for (int j=0; j < p; j++) { - matrix[i][j] = (float*)nalloc(sizeof(float)*q); + matrix[i][j] = (float*)nalloc(q, sizeof(float)); for (int k=0; k < q; k++) { matrix[i][j][k] = 0.; } @@ -98,7 +98,7 @@ void run_convolution_test(int input_dim, int output_dim, int rows, int columns) int k_size = input_dim - output_dim +1; // Génération des données aléatoires - Kernel_cnn* kernel = (Kernel_cnn*)nalloc(sizeof(Kernel_cnn)); + Kernel_cnn* kernel = (Kernel_cnn*)nalloc(1, sizeof(Kernel_cnn)); kernel->k_size = k_size; kernel->rows = rows; @@ -109,8 +109,8 @@ void run_convolution_test(int input_dim, int output_dim, int rows, int columns) kernel->d_bias = create_matrix(kernel->columns, output_dim, output_dim, 1.5f); // weights[rows][columns][k_size][k_size] - kernel->weights = (float****)nalloc(sizeof(float***)*kernel->rows); - kernel->d_weights = (float****)nalloc(sizeof(float***)*kernel->rows); + kernel->weights = (float****)nalloc(kernel->rows, sizeof(float***)); + kernel->d_weights = (float****)nalloc(kernel->rows, sizeof(float***)); for (int i=0; i < kernel->rows; i++) { kernel->weights[i] = create_matrix(kernel->columns, kernel->k_size, kernel->k_size, 15.0f); kernel->d_weights[i] = create_matrix(kernel->columns, kernel->k_size, kernel->k_size, 1.5f); diff --git a/test/cnn_matrix_multiplication.cu b/test/cnn_matrix_multiplication.cu index c87fe6d..e3758d9 100644 --- a/test/cnn_matrix_multiplication.cu +++ b/test/cnn_matrix_multiplication.cu @@ -38,9 +38,9 @@ void print_matrix(float** mat, int n, int p) { float** create_matrix(int n, int p) { - float** matrix = (float**)nalloc(n*sizeof(float*)); + float** matrix = (float**)nalloc(n, sizeof(float*)); for (int i=0; i < n; i++) { - matrix[i] = (float*)nalloc(sizeof(float)*p); + matrix[i] = (float*)nalloc(p, sizeof(float)); } fill_matrix_random(matrix, n, p); @@ -49,9 +49,9 @@ float** create_matrix(int n, int p) { float** create_empty_matrix(int n, int p) { - float** matrix = (float**)nalloc(n*sizeof(float*)); + float** matrix = (float**)nalloc(n, sizeof(float*)); for (int i=0; i < n; i++) { - matrix[i] = (float*)nalloc(p*sizeof(float)); + matrix[i] = (float*)nalloc(p, sizeof(float)); for (int j=0; j < p; j++) { matrix[i][j] = 0.; } diff --git a/test/memory_management.c b/test/memory_management.c index f7e7d2f..4c008a9 100644 --- a/test/memory_management.c +++ b/test/memory_management.c @@ -14,7 +14,7 @@ int main() { // We pollute a little bit the memory before the tests int* pointeurs[N]; for (int i=1; i < N; i++) { - pointeurs[i] = nalloc(i*sizeof(int)); + pointeurs[i] = (int*)nalloc(i, sizeof(int)); for (int j=0; j < i; j++) { pointeurs[i][j] = i; } @@ -23,14 +23,14 @@ int main() { // We test in a first place that one simple allocation works as expected mem_used = get_memory_distinct_allocations(); blocks_used = get_memory_blocks_number(); - void* ptr = nalloc(15); + void* ptr = nalloc(15, 1); if (! (get_memory_distinct_allocations() <= mem_used+1)) { - printf_error("Plus d'un élément de mémoire alloué en une seule allocation\n"); + printf_error((char*)"Plus d'un élément de mémoire alloué en une seule allocation\n"); exit(1); } gree(ptr); if (! (get_memory_blocks_number() == blocks_used)) { - printf_error("La mémoire n'a pas été libérée correctement\n"); + printf_error((char*)"La mémoire n'a pas été libérée correctement\n"); exit(1); } printf(GREEN "OK\n" RESET); @@ -40,10 +40,10 @@ int main() { printf("Allocation de deux demi-blocs\n"); // We test that we do not use too much blocks blocks_used = get_memory_blocks_number(); - void* ptr1 = nalloc(-1+MEMORY_BLOCK/2); - void* ptr2 = nalloc(-1+MEMORY_BLOCK/2); + void* ptr1 = nalloc(-1+MEMORY_BLOCK/2, 1); + void* ptr2 = nalloc(-1+MEMORY_BLOCK/2, 1); if (! (get_memory_blocks_number() <= blocks_used +1)) { - printf_error("Trop de blocs ont été alloués par rapport à la mémoire nécessaire\n"); + printf_error((char*)"Trop de blocs ont été alloués par rapport à la mémoire nécessaire\n"); exit(1); } printf(GREEN "OK\n" RESET); @@ -62,7 +62,7 @@ int main() { gree(ptr1); gree(ptr2); if (! (get_memory_distinct_allocations() == 0 && get_memory_blocks_number() == 0)) { - printf_error("La mémoire n'a pas été libérée correctement\n"); + printf_error((char*)"La mémoire n'a pas été libérée correctement\n"); exit(1); } printf(GREEN "OK\n" RESET); diff --git a/test/memory_management.cu b/test/memory_management.cu index 1b8240f..62df868 100644 --- a/test/memory_management.cu +++ b/test/memory_management.cu @@ -23,7 +23,7 @@ int main() { // We pollute a little bit the memory before the tests int* pointeurs[N]; for (int i=1; i < N; i++) { - pointeurs[i] = (int*)nalloc(i*sizeof(int)); + pointeurs[i] = (int*)nalloc(i, sizeof(int)); for (int j=0; j < i; j++) { pointeurs[i][j] = i; } @@ -32,7 +32,7 @@ int main() { // We test in a first place that one simple allocation works as expected mem_used = get_memory_distinct_allocations(); blocks_used = get_memory_blocks_number(); - void* ptr = nalloc(15); + void* ptr = nalloc(15, 1); if (! (get_memory_distinct_allocations() <= mem_used+1)) { printf("Plus d'un élément de mémoire alloué en une seule allocation\n"); exit(1); @@ -46,8 +46,8 @@ int main() { printf("Vérification de l'accès CUDA\n"); - /* On lance des kernels detaille 1 ce qui est itératif synchrone - * Donc un peu contraire à CUDA mais l'objectif est de débugger faiclement */ + /* On lance des kernels de taille 1 ce qui est à la fois itératif et synchrone + * Donc un peu contraire à CUDA mais l'objectif est de pouvoir débugger facilement */ dim3 gridSize(1, 1, 1); dim3 blockSize(1, 1, 1); @@ -62,8 +62,8 @@ int main() { printf("Allocation de deux demi-blocs\n"); // We test that we do not use too much blocks blocks_used = get_memory_blocks_number(); - void* ptr1 = nalloc(-1+MEMORY_BLOCK/2); - void* ptr2 = nalloc(-1+MEMORY_BLOCK/2); + void* ptr1 = nalloc(-1+MEMORY_BLOCK/2, 1); + void* ptr2 = nalloc(-1+MEMORY_BLOCK/2, 1); if (! (get_memory_blocks_number() <= blocks_used +1)) { printf("Trop de blocs ont été alloués par rapport à la mémoire nécessaire\n"); exit(1);