mirror of
https://github.com/augustin64/projet-tipe
synced 2025-02-02 19:39:39 +01:00
Align memory addresses when allocating for CUDA
https://docs.nvidia.com/cuda/cuda-c-programming-guide/index.html#device-memory-accesses
This commit is contained in:
parent
a049f578af
commit
b89c651174
@ -12,19 +12,19 @@ Network* create_network(int max_size, float learning_rate, int dropout, int init
|
|||||||
if (dropout < 0 || dropout > 100) {
|
if (dropout < 0 || dropout > 100) {
|
||||||
printf("Erreur, la probabilité de dropout n'est pas respecté, elle doit être comprise entre 0 et 100\n");
|
printf("Erreur, la probabilité de dropout n'est pas respecté, elle doit être comprise entre 0 et 100\n");
|
||||||
}
|
}
|
||||||
Network* network = (Network*)nalloc(sizeof(Network));
|
Network* network = (Network*)nalloc(1, sizeof(Network));
|
||||||
network->learning_rate = learning_rate;
|
network->learning_rate = learning_rate;
|
||||||
network->max_size = max_size;
|
network->max_size = max_size;
|
||||||
network->dropout = dropout;
|
network->dropout = dropout;
|
||||||
network->initialisation = initialisation;
|
network->initialisation = initialisation;
|
||||||
network->size = 1;
|
network->size = 1;
|
||||||
network->input = (float****)nalloc(sizeof(float***)*max_size);
|
network->input = (float****)nalloc(max_size, sizeof(float***));
|
||||||
network->input_z = (float****)nalloc(sizeof(float***)*max_size);
|
network->input_z = (float****)nalloc(max_size, sizeof(float***));
|
||||||
network->kernel = (Kernel**)nalloc(sizeof(Kernel*)*(max_size-1));
|
network->kernel = (Kernel**)nalloc(max_size-1, sizeof(Kernel*));
|
||||||
network->width = (int*)nalloc(sizeof(int*)*max_size);
|
network->width = (int*)nalloc(max_size, sizeof(int*));
|
||||||
network->depth = (int*)nalloc(sizeof(int*)*max_size);
|
network->depth = (int*)nalloc(max_size, sizeof(int*));
|
||||||
for (int i=0; i < max_size-1; i++) {
|
for (int i=0; i < max_size-1; i++) {
|
||||||
network->kernel[i] = (Kernel*)nalloc(sizeof(Kernel));
|
network->kernel[i] = (Kernel*)nalloc(1, sizeof(Kernel));
|
||||||
}
|
}
|
||||||
network->kernel[0]->linearisation = 0;
|
network->kernel[0]->linearisation = 0;
|
||||||
network->width[0] = input_dim;
|
network->width[0] = input_dim;
|
||||||
@ -58,11 +58,11 @@ Network* create_simple_one(float learning_rate, int dropout, int activation, int
|
|||||||
}
|
}
|
||||||
|
|
||||||
void create_a_cube_input_layer(Network* network, int pos, int depth, int dim) {
|
void create_a_cube_input_layer(Network* network, int pos, int depth, int dim) {
|
||||||
network->input[pos] = (float***)nalloc(sizeof(float**)*depth);
|
network->input[pos] = (float***)nalloc(depth, sizeof(float**));
|
||||||
for (int i=0; i < depth; i++) {
|
for (int i=0; i < depth; i++) {
|
||||||
network->input[pos][i] = (float**)nalloc(sizeof(float*)*dim);
|
network->input[pos][i] = (float**)nalloc(dim, sizeof(float*));
|
||||||
for (int j=0; j < dim; j++) {
|
for (int j=0; j < dim; j++) {
|
||||||
network->input[pos][i][j] = (float*)nalloc(sizeof(float)*dim);
|
network->input[pos][i][j] = (float*)nalloc(dim, sizeof(float));
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
network->width[pos] = dim;
|
network->width[pos] = dim;
|
||||||
@ -70,11 +70,11 @@ void create_a_cube_input_layer(Network* network, int pos, int depth, int dim) {
|
|||||||
}
|
}
|
||||||
|
|
||||||
void create_a_cube_input_z_layer(Network* network, int pos, int depth, int dim) {
|
void create_a_cube_input_z_layer(Network* network, int pos, int depth, int dim) {
|
||||||
network->input_z[pos] = (float***)nalloc(sizeof(float**)*depth);
|
network->input_z[pos] = (float***)nalloc(depth, sizeof(float**));
|
||||||
for (int i=0; i < depth; i++) {
|
for (int i=0; i < depth; i++) {
|
||||||
network->input_z[pos][i] = (float**)nalloc(sizeof(float*)*dim);
|
network->input_z[pos][i] = (float**)nalloc(dim, sizeof(float*));
|
||||||
for (int j=0; j < dim; j++) {
|
for (int j=0; j < dim; j++) {
|
||||||
network->input_z[pos][i][j] = (float*)nalloc(sizeof(float)*dim);
|
network->input_z[pos][i][j] = (float*)nalloc(dim, sizeof(float));
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
network->width[pos] = dim;
|
network->width[pos] = dim;
|
||||||
@ -82,17 +82,17 @@ void create_a_cube_input_z_layer(Network* network, int pos, int depth, int dim)
|
|||||||
}
|
}
|
||||||
|
|
||||||
void create_a_line_input_layer(Network* network, int pos, int dim) {
|
void create_a_line_input_layer(Network* network, int pos, int dim) {
|
||||||
network->input[pos] = (float***)nalloc(sizeof(float**));
|
network->input[pos] = (float***)nalloc(1, sizeof(float**));
|
||||||
network->input[pos][0] = (float**)nalloc(sizeof(float*));
|
network->input[pos][0] = (float**)nalloc(1, sizeof(float*));
|
||||||
network->input[pos][0][0] = (float*)nalloc(sizeof(float)*dim);
|
network->input[pos][0][0] = (float*)nalloc(dim, sizeof(float));
|
||||||
network->width[pos] = dim;
|
network->width[pos] = dim;
|
||||||
network->depth[pos] = 1;
|
network->depth[pos] = 1;
|
||||||
}
|
}
|
||||||
|
|
||||||
void create_a_line_input_z_layer(Network* network, int pos, int dim) {
|
void create_a_line_input_z_layer(Network* network, int pos, int dim) {
|
||||||
network->input_z[pos] = (float***)nalloc(sizeof(float**));
|
network->input_z[pos] = (float***)nalloc(1, sizeof(float**));
|
||||||
network->input_z[pos][0] = (float**)nalloc(sizeof(float*));
|
network->input_z[pos][0] = (float**)nalloc(1, sizeof(float*));
|
||||||
network->input_z[pos][0][0] = (float*)nalloc(sizeof(float)*dim);
|
network->input_z[pos][0][0] = (float*)nalloc(dim, sizeof(float));
|
||||||
network->width[pos] = dim;
|
network->width[pos] = dim;
|
||||||
network->depth[pos] = 1;
|
network->depth[pos] = 1;
|
||||||
}
|
}
|
||||||
@ -157,37 +157,37 @@ void add_convolution(Network* network, int depth_output, int dim_output, int act
|
|||||||
network->kernel[k_pos]->activation = activation;
|
network->kernel[k_pos]->activation = activation;
|
||||||
network->kernel[k_pos]->linearisation = 0;
|
network->kernel[k_pos]->linearisation = 0;
|
||||||
network->kernel[k_pos]->pooling = 0;
|
network->kernel[k_pos]->pooling = 0;
|
||||||
network->kernel[k_pos]->cnn = (Kernel_cnn*)nalloc(sizeof(Kernel_cnn));
|
network->kernel[k_pos]->cnn = (Kernel_cnn*)nalloc(1, sizeof(Kernel_cnn));
|
||||||
Kernel_cnn* cnn = network->kernel[k_pos]->cnn;
|
Kernel_cnn* cnn = network->kernel[k_pos]->cnn;
|
||||||
|
|
||||||
cnn->k_size = kernel_size;
|
cnn->k_size = kernel_size;
|
||||||
cnn->rows = depth_input;
|
cnn->rows = depth_input;
|
||||||
cnn->columns = depth_output;
|
cnn->columns = depth_output;
|
||||||
cnn->weights = (float****)nalloc(sizeof(float***)*depth_input);
|
cnn->weights = (float****)nalloc(depth_input, sizeof(float***));
|
||||||
cnn->d_weights = (float****)nalloc(sizeof(float***)*depth_input);
|
cnn->d_weights = (float****)nalloc(depth_input, sizeof(float***));
|
||||||
for (int i=0; i < depth_input; i++) {
|
for (int i=0; i < depth_input; i++) {
|
||||||
cnn->weights[i] = (float***)nalloc(sizeof(float**)*depth_output);
|
cnn->weights[i] = (float***)nalloc(depth_output, sizeof(float**));
|
||||||
cnn->d_weights[i] = (float***)nalloc(sizeof(float**)*depth_output);
|
cnn->d_weights[i] = (float***)nalloc(depth_output, sizeof(float**));
|
||||||
for (int j=0; j < depth_output; j++) {
|
for (int j=0; j < depth_output; j++) {
|
||||||
cnn->weights[i][j] = (float**)nalloc(sizeof(float*)*kernel_size);
|
cnn->weights[i][j] = (float**)nalloc(kernel_size, sizeof(float*));
|
||||||
cnn->d_weights[i][j] = (float**)nalloc(sizeof(float*)*kernel_size);
|
cnn->d_weights[i][j] = (float**)nalloc(kernel_size, sizeof(float*));
|
||||||
for (int k=0; k < kernel_size; k++) {
|
for (int k=0; k < kernel_size; k++) {
|
||||||
cnn->weights[i][j][k] = (float*)nalloc(sizeof(float)*kernel_size);
|
cnn->weights[i][j][k] = (float*)nalloc(kernel_size, sizeof(float));
|
||||||
cnn->d_weights[i][j][k] = (float*)nalloc(sizeof(float)*kernel_size);
|
cnn->d_weights[i][j][k] = (float*)nalloc(kernel_size, sizeof(float));
|
||||||
for (int l=0; l < kernel_size; l++) {
|
for (int l=0; l < kernel_size; l++) {
|
||||||
cnn->d_weights[i][j][k][l] = 0.;
|
cnn->d_weights[i][j][k][l] = 0.;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
cnn->bias = (float***)nalloc(sizeof(float**)*depth_output);
|
cnn->bias = (float***)nalloc(depth_output, sizeof(float**));
|
||||||
cnn->d_bias = (float***)nalloc(sizeof(float**)*depth_output);
|
cnn->d_bias = (float***)nalloc(depth_output, sizeof(float**));
|
||||||
for (int i=0; i < depth_output; i++) {
|
for (int i=0; i < depth_output; i++) {
|
||||||
cnn->bias[i] = (float**)nalloc(sizeof(float*)*bias_size);
|
cnn->bias[i] = (float**)nalloc(bias_size, sizeof(float*));
|
||||||
cnn->d_bias[i] = (float**)nalloc(sizeof(float*)*bias_size);
|
cnn->d_bias[i] = (float**)nalloc(bias_size, sizeof(float*));
|
||||||
for (int j=0; j < bias_size; j++) {
|
for (int j=0; j < bias_size; j++) {
|
||||||
cnn->bias[i][j] = (float*)nalloc(sizeof(float)*bias_size);
|
cnn->bias[i][j] = (float*)nalloc(bias_size, sizeof(float));
|
||||||
cnn->d_bias[i][j] = (float*)nalloc(sizeof(float)*bias_size);
|
cnn->d_bias[i][j] = (float*)nalloc(bias_size, sizeof(float));
|
||||||
for (int k=0; k < bias_size; k++) {
|
for (int k=0; k < bias_size; k++) {
|
||||||
cnn->d_bias[i][j][k] = 0.;
|
cnn->d_bias[i][j][k] = 0.;
|
||||||
}
|
}
|
||||||
@ -211,24 +211,24 @@ void add_dense(Network* network, int size_output, int activation) {
|
|||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
network->kernel[k_pos]->cnn = NULL;
|
network->kernel[k_pos]->cnn = NULL;
|
||||||
network->kernel[k_pos]->nn = (Kernel_nn*)nalloc(sizeof(Kernel_nn));
|
network->kernel[k_pos]->nn = (Kernel_nn*)nalloc(1, sizeof(Kernel_nn));
|
||||||
Kernel_nn* nn = network->kernel[k_pos]->nn;
|
Kernel_nn* nn = network->kernel[k_pos]->nn;
|
||||||
network->kernel[k_pos]->activation = activation;
|
network->kernel[k_pos]->activation = activation;
|
||||||
network->kernel[k_pos]->linearisation = 0;
|
network->kernel[k_pos]->linearisation = 0;
|
||||||
network->kernel[k_pos]->pooling = 0;
|
network->kernel[k_pos]->pooling = 0;
|
||||||
nn->size_input = size_input;
|
nn->size_input = size_input;
|
||||||
nn->size_output = size_output;
|
nn->size_output = size_output;
|
||||||
nn->bias = (float*)nalloc(sizeof(float)*size_output);
|
nn->bias = (float*)nalloc(size_output, sizeof(float));
|
||||||
nn->d_bias = (float*)nalloc(sizeof(float)*size_output);
|
nn->d_bias = (float*)nalloc(size_output, sizeof(float));
|
||||||
for (int i=0; i < size_output; i++) {
|
for (int i=0; i < size_output; i++) {
|
||||||
nn->d_bias[i] = 0.;
|
nn->d_bias[i] = 0.;
|
||||||
}
|
}
|
||||||
|
|
||||||
nn->weights = (float**)nalloc(sizeof(float*)*size_input);
|
nn->weights = (float**)nalloc(size_input, sizeof(float*));
|
||||||
nn->d_weights = (float**)nalloc(sizeof(float*)*size_input);
|
nn->d_weights = (float**)nalloc(size_input, sizeof(float*));
|
||||||
for (int i=0; i < size_input; i++) {
|
for (int i=0; i < size_input; i++) {
|
||||||
nn->weights[i] = (float*)nalloc(sizeof(float)*size_output);
|
nn->weights[i] = (float*)nalloc(size_output, sizeof(float));
|
||||||
nn->d_weights[i] = (float*)nalloc(sizeof(float)*size_output);
|
nn->d_weights[i] = (float*)nalloc(size_output, sizeof(float));
|
||||||
for (int j=0; j < size_output; j++) {
|
for (int j=0; j < size_output; j++) {
|
||||||
nn->d_weights[i][j] = 0.;
|
nn->d_weights[i][j] = 0.;
|
||||||
}
|
}
|
||||||
@ -252,7 +252,7 @@ void add_dense_linearisation(Network* network, int size_output, int activation)
|
|||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
network->kernel[k_pos]->cnn = NULL;
|
network->kernel[k_pos]->cnn = NULL;
|
||||||
network->kernel[k_pos]->nn = (Kernel_nn*)nalloc(sizeof(Kernel_nn));
|
network->kernel[k_pos]->nn = (Kernel_nn*)nalloc(1, sizeof(Kernel_nn));
|
||||||
Kernel_nn* nn = network->kernel[k_pos]->nn;
|
Kernel_nn* nn = network->kernel[k_pos]->nn;
|
||||||
network->kernel[k_pos]->activation = activation;
|
network->kernel[k_pos]->activation = activation;
|
||||||
network->kernel[k_pos]->linearisation = 1;
|
network->kernel[k_pos]->linearisation = 1;
|
||||||
@ -260,16 +260,16 @@ void add_dense_linearisation(Network* network, int size_output, int activation)
|
|||||||
nn->size_input = size_input;
|
nn->size_input = size_input;
|
||||||
nn->size_output = size_output;
|
nn->size_output = size_output;
|
||||||
|
|
||||||
nn->bias = (float*)nalloc(sizeof(float)*size_output);
|
nn->bias = (float*)nalloc(size_output, sizeof(float));
|
||||||
nn->d_bias = (float*)nalloc(sizeof(float)*size_output);
|
nn->d_bias = (float*)nalloc(size_output, sizeof(float));
|
||||||
for (int i=0; i < size_output; i++) {
|
for (int i=0; i < size_output; i++) {
|
||||||
nn->d_bias[i] = 0.;
|
nn->d_bias[i] = 0.;
|
||||||
}
|
}
|
||||||
nn->weights = (float**)nalloc(sizeof(float*)*size_input);
|
nn->weights = (float**)nalloc(size_input, sizeof(float*));
|
||||||
nn->d_weights = (float**)nalloc(sizeof(float*)*size_input);
|
nn->d_weights = (float**)nalloc(size_input, sizeof(float*));
|
||||||
for (int i=0; i < size_input; i++) {
|
for (int i=0; i < size_input; i++) {
|
||||||
nn->weights[i] = (float*)nalloc(sizeof(float)*size_output);
|
nn->weights[i] = (float*)nalloc(size_output, sizeof(float));
|
||||||
nn->d_weights[i] = (float*)nalloc(sizeof(float)*size_output);
|
nn->d_weights[i] = (float*)nalloc(size_output, sizeof(float));
|
||||||
for (int j=0; j < size_output; j++) {
|
for (int j=0; j < size_output; j++) {
|
||||||
nn->d_weights[i][j] = 0.;
|
nn->d_weights[i][j] = 0.;
|
||||||
}
|
}
|
||||||
|
@ -234,7 +234,7 @@ void make_dense(Kernel_nn* kernel, float* input, float* output, int size_input,
|
|||||||
* Dense linearised
|
* Dense linearised
|
||||||
*/
|
*/
|
||||||
#ifdef __CUDACC__
|
#ifdef __CUDACC__
|
||||||
__global__ void make_dense_linearised_kernel(Kernel_nn* kernel, float*** input, float* output, int depth_input, int dim_input, int size_output) {
|
__global__ void make_dense_linearised_kernel(float** weights, float*** input, float* output, int depth_input, int dim_input, int size_output) {
|
||||||
// Équivalents respectifs de i, j et k dans la boucle effectuée par le cpu
|
// Équivalents respectifs de i, j et k dans la boucle effectuée par le cpu
|
||||||
int idx = threadIdx.x + blockDim.x*blockIdx.x; // < size_output
|
int idx = threadIdx.x + blockDim.x*blockIdx.x; // < size_output
|
||||||
|
|
||||||
@ -246,7 +246,7 @@ __global__ void make_dense_linearised_kernel(Kernel_nn* kernel, float*** input,
|
|||||||
for (int i=0; i < depth_input; i++) {
|
for (int i=0; i < depth_input; i++) {
|
||||||
for (int j=0; j < dim_input; j++) {
|
for (int j=0; j < dim_input; j++) {
|
||||||
for (int k=0; k < dim_input; k++) {
|
for (int k=0; k < dim_input; k++) {
|
||||||
f += input[i][j][k]*kernel->weights[k + j*dim_input + i*depth_input][idx];
|
f += input[i][j][k]*weights[k + j*dim_input + i*depth_input][idx];
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
@ -258,7 +258,7 @@ void make_dense_linearised_device(Kernel_nn* kernel, float*** input, float* outp
|
|||||||
dim3 gridSize(i_div_up(size_output, BLOCKSIZE_x*BLOCKSIZE_y), 1, 1);
|
dim3 gridSize(i_div_up(size_output, BLOCKSIZE_x*BLOCKSIZE_y), 1, 1);
|
||||||
dim3 blockSize(BLOCKSIZE_x*BLOCKSIZE_y, 1, BLOCKSIZE_z);
|
dim3 blockSize(BLOCKSIZE_x*BLOCKSIZE_y, 1, BLOCKSIZE_z);
|
||||||
|
|
||||||
make_dense_linearised_kernel<<<gridSize, blockSize>>>(kernel, input, output, depth_input, dim_input, size_output);
|
make_dense_linearised_kernel<<<gridSize, blockSize>>>(kernel->weights, input, output, depth_input, dim_input, size_output);
|
||||||
gpuErrchk( cudaPeekAtLastError() );
|
gpuErrchk( cudaPeekAtLastError() );
|
||||||
gpuErrchk( cudaDeviceSynchronize() );
|
gpuErrchk( cudaDeviceSynchronize() );
|
||||||
}
|
}
|
||||||
|
@ -234,7 +234,7 @@ void make_dense(Kernel_nn* kernel, float* input, float* output, int size_input,
|
|||||||
* Dense linearised
|
* Dense linearised
|
||||||
*/
|
*/
|
||||||
#ifdef __CUDACC__
|
#ifdef __CUDACC__
|
||||||
__global__ void make_dense_linearised_kernel(Kernel_nn* kernel, float*** input, float* output, int depth_input, int dim_input, int size_output) {
|
__global__ void make_dense_linearised_kernel(float** weights, float*** input, float* output, int depth_input, int dim_input, int size_output) {
|
||||||
// Équivalents respectifs de i, j et k dans la boucle effectuée par le cpu
|
// Équivalents respectifs de i, j et k dans la boucle effectuée par le cpu
|
||||||
int idx = threadIdx.x + blockDim.x*blockIdx.x; // < size_output
|
int idx = threadIdx.x + blockDim.x*blockIdx.x; // < size_output
|
||||||
|
|
||||||
@ -246,7 +246,7 @@ __global__ void make_dense_linearised_kernel(Kernel_nn* kernel, float*** input,
|
|||||||
for (int i=0; i < depth_input; i++) {
|
for (int i=0; i < depth_input; i++) {
|
||||||
for (int j=0; j < dim_input; j++) {
|
for (int j=0; j < dim_input; j++) {
|
||||||
for (int k=0; k < dim_input; k++) {
|
for (int k=0; k < dim_input; k++) {
|
||||||
f += input[i][j][k]*kernel->weights[k + j*dim_input + i*depth_input][idx];
|
f += input[i][j][k]*weights[k + j*dim_input + i*depth_input][idx];
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
@ -258,7 +258,7 @@ void make_dense_linearised_device(Kernel_nn* kernel, float*** input, float* outp
|
|||||||
dim3 gridSize(i_div_up(size_output, BLOCKSIZE_x*BLOCKSIZE_y), 1, 1);
|
dim3 gridSize(i_div_up(size_output, BLOCKSIZE_x*BLOCKSIZE_y), 1, 1);
|
||||||
dim3 blockSize(BLOCKSIZE_x*BLOCKSIZE_y, 1, BLOCKSIZE_z);
|
dim3 blockSize(BLOCKSIZE_x*BLOCKSIZE_y, 1, BLOCKSIZE_z);
|
||||||
|
|
||||||
make_dense_linearised_kernel<<<gridSize, blockSize>>>(kernel, input, output, depth_input, dim_input, size_output);
|
make_dense_linearised_kernel<<<gridSize, blockSize>>>(kernel->weights, input, output, depth_input, dim_input, size_output);
|
||||||
gpuErrchk( cudaPeekAtLastError() );
|
gpuErrchk( cudaPeekAtLastError() );
|
||||||
gpuErrchk( cudaDeviceSynchronize() );
|
gpuErrchk( cudaDeviceSynchronize() );
|
||||||
}
|
}
|
||||||
|
@ -141,7 +141,7 @@ void write_couche(Network* network, int indice_couche, int type_couche, FILE* pt
|
|||||||
|
|
||||||
Network* read_network(char* filename) {
|
Network* read_network(char* filename) {
|
||||||
FILE *ptr;
|
FILE *ptr;
|
||||||
Network* network = (Network*)nalloc(sizeof(Network));
|
Network* network = (Network*)nalloc(1, sizeof(Network));
|
||||||
|
|
||||||
ptr = fopen(filename, "rb");
|
ptr = fopen(filename, "rb");
|
||||||
|
|
||||||
@ -167,8 +167,8 @@ Network* read_network(char* filename) {
|
|||||||
network->dropout = dropout;
|
network->dropout = dropout;
|
||||||
|
|
||||||
// Lecture de la taille de l'entrée des différentes matrices
|
// Lecture de la taille de l'entrée des différentes matrices
|
||||||
network->width = (int*)nalloc(sizeof(int)*size);
|
network->width = (int*)nalloc(size, sizeof(int));
|
||||||
network->depth = (int*)nalloc(sizeof(int)*size);
|
network->depth = (int*)nalloc(size, sizeof(int));
|
||||||
|
|
||||||
for (int i=0; i < (int)size; i++) {
|
for (int i=0; i < (int)size; i++) {
|
||||||
fread(&tmp, sizeof(uint32_t), 1, ptr);
|
fread(&tmp, sizeof(uint32_t), 1, ptr);
|
||||||
@ -186,19 +186,19 @@ Network* read_network(char* filename) {
|
|||||||
}
|
}
|
||||||
|
|
||||||
// Lecture de chaque couche
|
// Lecture de chaque couche
|
||||||
network->kernel = (Kernel**)nalloc(sizeof(Kernel*)*(size-1));
|
network->kernel = (Kernel**)nalloc(size-1, sizeof(Kernel*));
|
||||||
|
|
||||||
for (int i=0; i < (int)size-1; i++) {
|
for (int i=0; i < (int)size-1; i++) {
|
||||||
network->kernel[i] = read_kernel(type_couche[i], network->width[i+1], ptr);
|
network->kernel[i] = read_kernel(type_couche[i], network->width[i+1], ptr);
|
||||||
}
|
}
|
||||||
|
|
||||||
network->input = (float****)nalloc(sizeof(float***)*size);
|
network->input = (float****)nalloc(size, sizeof(float***));
|
||||||
for (int i=0; i < (int)size; i++) { // input[size][couche->depth][couche->dim][couche->dim]
|
for (int i=0; i < (int)size; i++) { // input[size][couche->depth][couche->dim][couche->dim]
|
||||||
network->input[i] = (float***)nalloc(sizeof(float**)*network->depth[i]);
|
network->input[i] = (float***)nalloc(network->depth[i], sizeof(float**));
|
||||||
for (int j=0; j < network->depth[i]; j++) {
|
for (int j=0; j < network->depth[i]; j++) {
|
||||||
network->input[i][j] = (float**)nalloc(sizeof(float*)*network->width[i]);
|
network->input[i][j] = (float**)nalloc(network->width[i], sizeof(float*));
|
||||||
for (int k=0; k < network->width[i]; k++) {
|
for (int k=0; k < network->width[i]; k++) {
|
||||||
network->input[i][j][k] = (float*)nalloc(sizeof(float)*network->width[i]);
|
network->input[i][j][k] = (float*)nalloc(network->width[i], sizeof(float));
|
||||||
for (int l=0; l < network->width[i]; l++) {
|
for (int l=0; l < network->width[i]; l++) {
|
||||||
network->input[i][j][k][l] = 0.;
|
network->input[i][j][k][l] = 0.;
|
||||||
}
|
}
|
||||||
@ -206,13 +206,13 @@ Network* read_network(char* filename) {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
network->input_z = (float****)nalloc(sizeof(float***)*size);
|
network->input_z = (float****)nalloc(size, sizeof(float***));
|
||||||
for (int i=0; i < (int)size; i++) { // input[size][couche->depth][couche->dim][couche->dim]
|
for (int i=0; i < (int)size; i++) { // input[size][couche->depth][couche->dim][couche->dim]
|
||||||
network->input_z[i] = (float***)nalloc(sizeof(float**)*network->depth[i]);
|
network->input_z[i] = (float***)nalloc(network->depth[i], sizeof(float**));
|
||||||
for (int j=0; j < network->depth[i]; j++) {
|
for (int j=0; j < network->depth[i]; j++) {
|
||||||
network->input_z[i][j] = (float**)nalloc(sizeof(float*)*network->width[i]);
|
network->input_z[i][j] = (float**)nalloc(network->width[i], sizeof(float*));
|
||||||
for (int k=0; k < network->width[i]; k++) {
|
for (int k=0; k < network->width[i]; k++) {
|
||||||
network->input_z[i][j][k] = (float*)nalloc(sizeof(float)*network->width[i]);
|
network->input_z[i][j][k] = (float*)nalloc(network->width[i], sizeof(float));
|
||||||
for (int l=0; l < network->width[i]; l++) {
|
for (int l=0; l < network->width[i]; l++) {
|
||||||
network->input_z[i][j][k][l] = 0.;
|
network->input_z[i][j][k][l] = 0.;
|
||||||
}
|
}
|
||||||
@ -225,10 +225,10 @@ Network* read_network(char* filename) {
|
|||||||
}
|
}
|
||||||
|
|
||||||
Kernel* read_kernel(int type_couche, int output_dim, FILE* ptr) {
|
Kernel* read_kernel(int type_couche, int output_dim, FILE* ptr) {
|
||||||
Kernel* kernel = (Kernel*)nalloc(sizeof(Kernel));
|
Kernel* kernel = (Kernel*)nalloc(1, sizeof(Kernel));
|
||||||
if (type_couche == 0) { // Cas du CNN
|
if (type_couche == 0) { // Cas du CNN
|
||||||
// Lecture du "Pré-corps"
|
// Lecture du "Pré-corps"
|
||||||
kernel->cnn = (Kernel_cnn*)nalloc(sizeof(Kernel_cnn));
|
kernel->cnn = (Kernel_cnn*)nalloc(1, sizeof(Kernel_cnn));
|
||||||
kernel->nn = NULL;
|
kernel->nn = NULL;
|
||||||
uint32_t buffer[5];
|
uint32_t buffer[5];
|
||||||
fread(&buffer, sizeof(buffer), 1, ptr);
|
fread(&buffer, sizeof(buffer), 1, ptr);
|
||||||
@ -243,14 +243,14 @@ Kernel* read_kernel(int type_couche, int output_dim, FILE* ptr) {
|
|||||||
Kernel_cnn* cnn = kernel->cnn;
|
Kernel_cnn* cnn = kernel->cnn;
|
||||||
float tmp;
|
float tmp;
|
||||||
|
|
||||||
cnn->bias = (float***)nalloc(sizeof(float**)*cnn->columns);
|
cnn->bias = (float***)nalloc(cnn->columns, sizeof(float**));
|
||||||
cnn->d_bias = (float***)nalloc(sizeof(float**)*cnn->columns);
|
cnn->d_bias = (float***)nalloc(cnn->columns, sizeof(float**));
|
||||||
for (int i=0; i < cnn->columns; i++) {
|
for (int i=0; i < cnn->columns; i++) {
|
||||||
cnn->bias[i] = (float**)nalloc(sizeof(float*)*output_dim);
|
cnn->bias[i] = (float**)nalloc(output_dim, sizeof(float*));
|
||||||
cnn->d_bias[i] = (float**)nalloc(sizeof(float*)*output_dim);
|
cnn->d_bias[i] = (float**)nalloc(output_dim, sizeof(float*));
|
||||||
for (int j=0; j < output_dim; j++) {
|
for (int j=0; j < output_dim; j++) {
|
||||||
cnn->bias[i][j] = (float*)nalloc(sizeof(float)*output_dim);
|
cnn->bias[i][j] = (float*)nalloc(output_dim, sizeof(float));
|
||||||
cnn->d_bias[i][j] = (float*)nalloc(sizeof(float)*output_dim);
|
cnn->d_bias[i][j] = (float*)nalloc(output_dim, sizeof(float));
|
||||||
for (int k=0; k < output_dim; k++) {
|
for (int k=0; k < output_dim; k++) {
|
||||||
fread(&tmp, sizeof(tmp), 1, ptr);
|
fread(&tmp, sizeof(tmp), 1, ptr);
|
||||||
cnn->bias[i][j][k] = tmp;
|
cnn->bias[i][j][k] = tmp;
|
||||||
@ -259,17 +259,17 @@ Kernel* read_kernel(int type_couche, int output_dim, FILE* ptr) {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
cnn->weights = (float****)nalloc(sizeof(float***)*cnn->rows);
|
cnn->weights = (float****)nalloc(cnn->rows, sizeof(float***));
|
||||||
cnn->d_weights = (float****)nalloc(sizeof(float***)*cnn->rows);
|
cnn->d_weights = (float****)nalloc(cnn->rows, sizeof(float***));
|
||||||
for (int i=0; i < cnn->rows; i++) {
|
for (int i=0; i < cnn->rows; i++) {
|
||||||
cnn->weights[i] = (float***)nalloc(sizeof(float**)*cnn->columns);
|
cnn->weights[i] = (float***)nalloc(cnn->columns, sizeof(float**));
|
||||||
cnn->d_weights[i] = (float***)nalloc(sizeof(float**)*cnn->columns);
|
cnn->d_weights[i] = (float***)nalloc(cnn->columns, sizeof(float**));
|
||||||
for (int j=0; j < cnn->columns; j++) {
|
for (int j=0; j < cnn->columns; j++) {
|
||||||
cnn->weights[i][j] = (float**)nalloc(sizeof(float*)*cnn->k_size);
|
cnn->weights[i][j] = (float**)nalloc(cnn->k_size, sizeof(float*));
|
||||||
cnn->d_weights[i][j] = (float**)nalloc(sizeof(float*)*cnn->k_size);
|
cnn->d_weights[i][j] = (float**)nalloc(cnn->k_size, sizeof(float*));
|
||||||
for (int k=0; k < cnn->k_size; k++) {
|
for (int k=0; k < cnn->k_size; k++) {
|
||||||
cnn->weights[i][j][k] = (float*)nalloc(sizeof(float)*cnn->k_size);
|
cnn->weights[i][j][k] = (float*)nalloc(cnn->k_size, sizeof(float));
|
||||||
cnn->d_weights[i][j][k] = (float*)nalloc(sizeof(float)*cnn->k_size);
|
cnn->d_weights[i][j][k] = (float*)nalloc(cnn->k_size, sizeof(float));
|
||||||
for (int l=0; l < cnn->k_size; l++) {
|
for (int l=0; l < cnn->k_size; l++) {
|
||||||
fread(&tmp, sizeof(tmp), 1, ptr);
|
fread(&tmp, sizeof(tmp), 1, ptr);
|
||||||
cnn->weights[i][j][k][l] = tmp;
|
cnn->weights[i][j][k][l] = tmp;
|
||||||
@ -280,7 +280,7 @@ Kernel* read_kernel(int type_couche, int output_dim, FILE* ptr) {
|
|||||||
}
|
}
|
||||||
} else if (type_couche == 1) { // Cas du NN
|
} else if (type_couche == 1) { // Cas du NN
|
||||||
// Lecture du "Pré-corps"
|
// Lecture du "Pré-corps"
|
||||||
kernel->nn = (Kernel_nn*)nalloc(sizeof(Kernel_nn));
|
kernel->nn = (Kernel_nn*)nalloc(1, sizeof(Kernel_nn));
|
||||||
kernel->cnn = NULL;
|
kernel->cnn = NULL;
|
||||||
uint32_t buffer[4];
|
uint32_t buffer[4];
|
||||||
fread(&buffer, sizeof(buffer), 1, ptr);
|
fread(&buffer, sizeof(buffer), 1, ptr);
|
||||||
@ -294,19 +294,19 @@ Kernel* read_kernel(int type_couche, int output_dim, FILE* ptr) {
|
|||||||
Kernel_nn* nn = kernel->nn;
|
Kernel_nn* nn = kernel->nn;
|
||||||
float tmp;
|
float tmp;
|
||||||
|
|
||||||
nn->bias = (float*)nalloc(sizeof(float)*nn->size_output);
|
nn->bias = (float*)nalloc(nn->size_output, sizeof(float));
|
||||||
nn->d_bias = (float*)nalloc(sizeof(float)*nn->size_output);
|
nn->d_bias = (float*)nalloc(nn->size_output, sizeof(float));
|
||||||
for (int i=0; i < nn->size_output; i++) {
|
for (int i=0; i < nn->size_output; i++) {
|
||||||
fread(&tmp, sizeof(tmp), 1, ptr);
|
fread(&tmp, sizeof(tmp), 1, ptr);
|
||||||
nn->bias[i] = tmp;
|
nn->bias[i] = tmp;
|
||||||
nn->d_bias[i] = 0.;
|
nn->d_bias[i] = 0.;
|
||||||
}
|
}
|
||||||
|
|
||||||
nn->weights = (float**)nalloc(sizeof(float*)*nn->size_input);
|
nn->weights = (float**)nalloc(nn->size_input, sizeof(float*));
|
||||||
nn->d_weights = (float**)nalloc(sizeof(float*)*nn->size_input);
|
nn->d_weights = (float**)nalloc(nn->size_input, sizeof(float*));
|
||||||
for (int i=0; i < nn->size_input; i++) {
|
for (int i=0; i < nn->size_input; i++) {
|
||||||
nn->weights[i] = (float*)nalloc(sizeof(float)*nn->size_output);
|
nn->weights[i] = (float*)nalloc(nn->size_output, sizeof(float));
|
||||||
nn->d_weights[i] = (float*)nalloc(sizeof(float)*nn->size_output);
|
nn->d_weights[i] = (float*)nalloc(nn->size_output, sizeof(float));
|
||||||
for (int j=0; j < nn->size_output; j++) {
|
for (int j=0; j < nn->size_output; j++) {
|
||||||
fread(&tmp, sizeof(tmp), 1, ptr);
|
fread(&tmp, sizeof(tmp), 1, ptr);
|
||||||
nn->weights[i][j] = tmp;
|
nn->weights[i][j] = tmp;
|
||||||
|
@ -96,7 +96,7 @@ bool equals_networks(Network* network1, Network* network2) {
|
|||||||
|
|
||||||
|
|
||||||
Network* copy_network(Network* network) {
|
Network* copy_network(Network* network) {
|
||||||
Network* network_cp = (Network*)nalloc(sizeof(Network));
|
Network* network_cp = (Network*)nalloc(1, sizeof(Network));
|
||||||
// Paramètre du réseau
|
// Paramètre du réseau
|
||||||
int size = network->size;
|
int size = network->size;
|
||||||
// Paramètres des couches NN
|
// Paramètres des couches NN
|
||||||
@ -114,17 +114,17 @@ Network* copy_network(Network* network) {
|
|||||||
copyVar(max_size);
|
copyVar(max_size);
|
||||||
copyVar(size);
|
copyVar(size);
|
||||||
|
|
||||||
network_cp->width = (int*)nalloc(sizeof(int)*size);
|
network_cp->width = (int*)nalloc(size, sizeof(int));
|
||||||
network_cp->depth = (int*)nalloc(sizeof(int)*size);
|
network_cp->depth = (int*)nalloc(size, sizeof(int));
|
||||||
|
|
||||||
for (int i=0; i < size; i++) {
|
for (int i=0; i < size; i++) {
|
||||||
copyVar(width[i]);
|
copyVar(width[i]);
|
||||||
copyVar(depth[i]);
|
copyVar(depth[i]);
|
||||||
}
|
}
|
||||||
|
|
||||||
network_cp->kernel = (Kernel**)nalloc(sizeof(Kernel*)*(size-1));
|
network_cp->kernel = (Kernel**)nalloc(size-1, sizeof(Kernel*));
|
||||||
for (int i=0; i < size-1; i++) {
|
for (int i=0; i < size-1; i++) {
|
||||||
network_cp->kernel[i] = (Kernel*)nalloc(sizeof(Kernel));
|
network_cp->kernel[i] = (Kernel*)nalloc(1, sizeof(Kernel));
|
||||||
if (!network->kernel[i]->nn && !network->kernel[i]->cnn) { // Cas de la couche de linéarisation
|
if (!network->kernel[i]->nn && !network->kernel[i]->cnn) { // Cas de la couche de linéarisation
|
||||||
copyVar(kernel[i]->pooling);
|
copyVar(kernel[i]->pooling);
|
||||||
copyVar(kernel[i]->activation);
|
copyVar(kernel[i]->activation);
|
||||||
@ -141,23 +141,23 @@ Network* copy_network(Network* network) {
|
|||||||
size_output = network->kernel[i]->nn->size_output;
|
size_output = network->kernel[i]->nn->size_output;
|
||||||
|
|
||||||
network_cp->kernel[i]->cnn = NULL;
|
network_cp->kernel[i]->cnn = NULL;
|
||||||
network_cp->kernel[i]->nn = (Kernel_nn*)nalloc(sizeof(Kernel_nn));
|
network_cp->kernel[i]->nn = (Kernel_nn*)nalloc(1, sizeof(Kernel_nn));
|
||||||
|
|
||||||
copyVar(kernel[i]->nn->size_input);
|
copyVar(kernel[i]->nn->size_input);
|
||||||
copyVar(kernel[i]->nn->size_output);
|
copyVar(kernel[i]->nn->size_output);
|
||||||
|
|
||||||
network_cp->kernel[i]->nn->bias = (float*)nalloc(sizeof(float)*size_output);
|
network_cp->kernel[i]->nn->bias = (float*)nalloc(size_output, sizeof(float));
|
||||||
network_cp->kernel[i]->nn->d_bias = (float*)nalloc(sizeof(float)*size_output);
|
network_cp->kernel[i]->nn->d_bias = (float*)nalloc(size_output, sizeof(float));
|
||||||
for (int j=0; j < size_output; j++) {
|
for (int j=0; j < size_output; j++) {
|
||||||
copyVar(kernel[i]->nn->bias[j]);
|
copyVar(kernel[i]->nn->bias[j]);
|
||||||
network_cp->kernel[i]->nn->d_bias[j] = 0.;
|
network_cp->kernel[i]->nn->d_bias[j] = 0.;
|
||||||
}
|
}
|
||||||
|
|
||||||
network_cp->kernel[i]->nn->weights = (float**)nalloc(sizeof(float*)*size_input);
|
network_cp->kernel[i]->nn->weights = (float**)nalloc(size_input, sizeof(float*));
|
||||||
network_cp->kernel[i]->nn->d_weights = (float**)nalloc(sizeof(float*)*size_input);
|
network_cp->kernel[i]->nn->d_weights = (float**)nalloc(size_input, sizeof(float*));
|
||||||
for (int j=0; j < size_input; j++) {
|
for (int j=0; j < size_input; j++) {
|
||||||
network_cp->kernel[i]->nn->weights[j] = (float*)nalloc(sizeof(float)*size_output);
|
network_cp->kernel[i]->nn->weights[j] = (float*)nalloc(size_output, sizeof(float));
|
||||||
network_cp->kernel[i]->nn->d_weights[j] = (float*)nalloc(sizeof(float)*size_output);
|
network_cp->kernel[i]->nn->d_weights[j] = (float*)nalloc(size_output, sizeof(float));
|
||||||
for (int k=0; k < size_output; k++) {
|
for (int k=0; k < size_output; k++) {
|
||||||
copyVar(kernel[i]->nn->weights[j][k]);
|
copyVar(kernel[i]->nn->weights[j][k]);
|
||||||
network_cp->kernel[i]->nn->d_weights[j][k] = 0.;
|
network_cp->kernel[i]->nn->d_weights[j][k] = 0.;
|
||||||
@ -176,20 +176,20 @@ Network* copy_network(Network* network) {
|
|||||||
|
|
||||||
|
|
||||||
network_cp->kernel[i]->nn = NULL;
|
network_cp->kernel[i]->nn = NULL;
|
||||||
network_cp->kernel[i]->cnn = (Kernel_cnn*)nalloc(sizeof(Kernel_cnn));
|
network_cp->kernel[i]->cnn = (Kernel_cnn*)nalloc(1, sizeof(Kernel_cnn));
|
||||||
|
|
||||||
copyVar(kernel[i]->cnn->rows);
|
copyVar(kernel[i]->cnn->rows);
|
||||||
copyVar(kernel[i]->cnn->k_size);
|
copyVar(kernel[i]->cnn->k_size);
|
||||||
copyVar(kernel[i]->cnn->columns);
|
copyVar(kernel[i]->cnn->columns);
|
||||||
|
|
||||||
network_cp->kernel[i]->cnn->bias = (float***)nalloc(sizeof(float**)*columns);
|
network_cp->kernel[i]->cnn->bias = (float***)nalloc(columns, sizeof(float**));
|
||||||
network_cp->kernel[i]->cnn->d_bias = (float***)nalloc(sizeof(float**)*columns);
|
network_cp->kernel[i]->cnn->d_bias = (float***)nalloc(columns, sizeof(float**));
|
||||||
for (int j=0; j < columns; j++) {
|
for (int j=0; j < columns; j++) {
|
||||||
network_cp->kernel[i]->cnn->bias[j] = (float**)nalloc(sizeof(float*)*output_dim);
|
network_cp->kernel[i]->cnn->bias[j] = (float**)nalloc(output_dim, sizeof(float*));
|
||||||
network_cp->kernel[i]->cnn->d_bias[j] = (float**)nalloc(sizeof(float*)*output_dim);
|
network_cp->kernel[i]->cnn->d_bias[j] = (float**)nalloc(output_dim, sizeof(float*));
|
||||||
for (int k=0; k < output_dim; k++) {
|
for (int k=0; k < output_dim; k++) {
|
||||||
network_cp->kernel[i]->cnn->bias[j][k] = (float*)nalloc(sizeof(float)*output_dim);
|
network_cp->kernel[i]->cnn->bias[j][k] = (float*)nalloc(output_dim, sizeof(float));
|
||||||
network_cp->kernel[i]->cnn->d_bias[j][k] = (float*)nalloc(sizeof(float)*output_dim);
|
network_cp->kernel[i]->cnn->d_bias[j][k] = (float*)nalloc(output_dim, sizeof(float));
|
||||||
for (int l=0; l < output_dim; l++) {
|
for (int l=0; l < output_dim; l++) {
|
||||||
copyVar(kernel[i]->cnn->bias[j][k][l]);
|
copyVar(kernel[i]->cnn->bias[j][k][l]);
|
||||||
network_cp->kernel[i]->cnn->d_bias[j][k][l] = 0.;
|
network_cp->kernel[i]->cnn->d_bias[j][k][l] = 0.;
|
||||||
@ -197,17 +197,17 @@ Network* copy_network(Network* network) {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
network_cp->kernel[i]->cnn->weights = (float****)nalloc(sizeof(float***)*rows);
|
network_cp->kernel[i]->cnn->weights = (float****)nalloc(rows, sizeof(float***));
|
||||||
network_cp->kernel[i]->cnn->d_weights = (float****)nalloc(sizeof(float***)*rows);
|
network_cp->kernel[i]->cnn->d_weights = (float****)nalloc(rows, sizeof(float***));
|
||||||
for (int j=0; j < rows; j++) {
|
for (int j=0; j < rows; j++) {
|
||||||
network_cp->kernel[i]->cnn->weights[j] = (float***)nalloc(sizeof(float**)*columns);
|
network_cp->kernel[i]->cnn->weights[j] = (float***)nalloc(columns, sizeof(float**));
|
||||||
network_cp->kernel[i]->cnn->d_weights[j] = (float***)nalloc(sizeof(float**)*columns);
|
network_cp->kernel[i]->cnn->d_weights[j] = (float***)nalloc(columns, sizeof(float**));
|
||||||
for (int k=0; k < columns; k++) {
|
for (int k=0; k < columns; k++) {
|
||||||
network_cp->kernel[i]->cnn->weights[j][k] = (float**)nalloc(sizeof(float*)*k_size);
|
network_cp->kernel[i]->cnn->weights[j][k] = (float**)nalloc(k_size, sizeof(float*));
|
||||||
network_cp->kernel[i]->cnn->d_weights[j][k] = (float**)nalloc(sizeof(float*)*k_size);
|
network_cp->kernel[i]->cnn->d_weights[j][k] = (float**)nalloc(k_size, sizeof(float*));
|
||||||
for (int l=0; l < k_size; l++) {
|
for (int l=0; l < k_size; l++) {
|
||||||
network_cp->kernel[i]->cnn->weights[j][k][l] = (float*)nalloc(sizeof(float)*k_size);
|
network_cp->kernel[i]->cnn->weights[j][k][l] = (float*)nalloc(k_size, sizeof(float));
|
||||||
network_cp->kernel[i]->cnn->d_weights[j][k][l] = (float*)nalloc(sizeof(float)*k_size);
|
network_cp->kernel[i]->cnn->d_weights[j][k][l] = (float*)nalloc(k_size, sizeof(float));
|
||||||
for (int m=0; m < k_size; m++) {
|
for (int m=0; m < k_size; m++) {
|
||||||
copyVar(kernel[i]->cnn->weights[j][k][l][m]);
|
copyVar(kernel[i]->cnn->weights[j][k][l][m]);
|
||||||
network_cp->kernel[i]->cnn->d_weights[j][k][l][m] = 0.;
|
network_cp->kernel[i]->cnn->d_weights[j][k][l][m] = 0.;
|
||||||
@ -218,13 +218,13 @@ Network* copy_network(Network* network) {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
network_cp->input = (float****)nalloc(sizeof(float***)*size);
|
network_cp->input = (float****)nalloc(size, sizeof(float***));
|
||||||
for (int i=0; i < size; i++) { // input[size][couche->depth][couche->dim][couche->dim]
|
for (int i=0; i < size; i++) { // input[size][couche->depth][couche->dim][couche->dim]
|
||||||
network_cp->input[i] = (float***)nalloc(sizeof(float**)*network->depth[i]);
|
network_cp->input[i] = (float***)nalloc(network->depth[i], sizeof(float**));
|
||||||
for (int j=0; j < network->depth[i]; j++) {
|
for (int j=0; j < network->depth[i]; j++) {
|
||||||
network_cp->input[i][j] = (float**)nalloc(sizeof(float*)*network->width[i]);
|
network_cp->input[i][j] = (float**)nalloc(network->width[i], sizeof(float*));
|
||||||
for (int k=0; k < network->width[i]; k++) {
|
for (int k=0; k < network->width[i]; k++) {
|
||||||
network_cp->input[i][j][k] = (float*)nalloc(sizeof(float)*network->width[i]);
|
network_cp->input[i][j][k] = (float*)nalloc(network->width[i], sizeof(float));
|
||||||
for (int l=0; l < network->width[i]; l++) {
|
for (int l=0; l < network->width[i]; l++) {
|
||||||
network_cp->input[i][j][k][l] = 0.;
|
network_cp->input[i][j][k][l] = 0.;
|
||||||
}
|
}
|
||||||
@ -232,13 +232,13 @@ Network* copy_network(Network* network) {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
network_cp->input_z = (float****)nalloc(sizeof(float***)*size);
|
network_cp->input_z = (float****)nalloc(size, sizeof(float***));
|
||||||
for (int i=0; i < size; i++) { // input_z[size][couche->depth][couche->dim][couche->dim]
|
for (int i=0; i < size; i++) { // input_z[size][couche->depth][couche->dim][couche->dim]
|
||||||
network_cp->input_z[i] = (float***)nalloc(sizeof(float**)*network->depth[i]);
|
network_cp->input_z[i] = (float***)nalloc(network->depth[i], sizeof(float**));
|
||||||
for (int j=0; j < network->depth[i]; j++) {
|
for (int j=0; j < network->depth[i]; j++) {
|
||||||
network_cp->input_z[i][j] = (float**)nalloc(sizeof(float*)*network->width[i]);
|
network_cp->input_z[i][j] = (float**)nalloc(network->width[i], sizeof(float*));
|
||||||
for (int k=0; k < network->width[i]; k++) {
|
for (int k=0; k < network->width[i]; k++) {
|
||||||
network_cp->input_z[i][j][k] = (float*)nalloc(sizeof(float)*network->width[i]);
|
network_cp->input_z[i][j][k] = (float*)nalloc(network->width[i], sizeof(float));
|
||||||
for (int l=0; l < network->width[i]; l++) {
|
for (int l=0; l < network->width[i]; l++) {
|
||||||
network_cp->input_z[i][j][k][l] = 0.;
|
network_cp->input_z[i][j][k][l] = 0.;
|
||||||
}
|
}
|
||||||
|
@ -67,7 +67,7 @@ Memory* create_memory_block(size_t size);
|
|||||||
/*
|
/*
|
||||||
* Allouer un élément de taille size dans mem
|
* Allouer un élément de taille size dans mem
|
||||||
*/
|
*/
|
||||||
void* allocate_memory(size_t size, Memory* mem);
|
void* allocate_memory(int nb_elements, size_t size, Memory* mem);
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* Essayer de libérer le pointeur représenté par ptr dans mem
|
* Essayer de libérer le pointeur représenté par ptr dans mem
|
||||||
@ -80,7 +80,7 @@ extern "C"
|
|||||||
/*
|
/*
|
||||||
* Alloue de la mémoire partagée CUDA si CUDA est activé
|
* Alloue de la mémoire partagée CUDA si CUDA est activé
|
||||||
*/
|
*/
|
||||||
void* nalloc(size_t sz);
|
void* nalloc(int nb_elements, size_t size);
|
||||||
|
|
||||||
#ifdef __CUDACC__
|
#ifdef __CUDACC__
|
||||||
extern "C"
|
extern "C"
|
||||||
|
@ -69,20 +69,33 @@ Memory* create_memory_block(size_t size) {
|
|||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
void* allocate_memory(size_t size, Memory* mem) {
|
void* allocate_memory(int nb_elements, size_t size, Memory* mem) {
|
||||||
|
/*
|
||||||
|
* cursor_aligned pointe vers le premier emplacement qui pourrait être utilisé (de manière alignée).
|
||||||
|
* en effet, la mémoire nécessite d'être alignée avec CUDA:
|
||||||
|
* https://docs.nvidia.com/cuda/cuda-c-programming-guide/index.html#device-memory-accesses
|
||||||
|
*/
|
||||||
|
void* aligned_cursor = mem->cursor;
|
||||||
|
#ifdef __CUDACC__
|
||||||
|
// Cela devrait être faisable avec opérateurs binaires directement, mais on préfèrera quelque chose de lisible et vérifiable
|
||||||
|
if (((intptr_t)mem->cursor) %size != 0) {
|
||||||
|
if (size == 2 || size == 4 || size == 8 || size == 16)
|
||||||
|
aligned_cursor = (void*)(((intptr_t)mem->cursor) + (size - (((intptr_t)mem->cursor) %size)));
|
||||||
|
}
|
||||||
|
#endif
|
||||||
// Si il y a suffisamment de mémoire disponible
|
// Si il y a suffisamment de mémoire disponible
|
||||||
if (mem->size - ((intptr_t)mem->cursor - (intptr_t)mem->start) >= size) {
|
if (mem->size - ((intptr_t)aligned_cursor - (intptr_t)mem->start) >= nb_elements*size) {
|
||||||
void* ptr = mem->cursor;
|
void* ptr = aligned_cursor;
|
||||||
mem->cursor = (void*)((intptr_t)mem->cursor + size); // On décale le curseur de la taille allouée
|
mem->cursor = (void*)((intptr_t)aligned_cursor + nb_elements*size); // On décale le curseur de la taille allouée
|
||||||
mem->nb_alloc++;
|
mem->nb_alloc++;
|
||||||
return ptr;
|
return ptr;
|
||||||
} else {
|
} else {
|
||||||
//printf("Mémoire disponible: %ld. Nécessaire: %ld\n", mem->size - ((intptr_t)mem->cursor - (intptr_t)mem->start), size);
|
//printf("Mémoire disponible: %ld. Nécessaire: %ld\n", mem->size - ((intptr_t)mem->cursor - (intptr_t)mem->start), nb_elements*size);
|
||||||
// Sinon on continue sur l'élément suivant de la liste
|
// Sinon on continue sur l'élément suivant de la liste
|
||||||
if (!mem->next) {
|
if (!mem->next) {
|
||||||
mem->next = create_memory_block(MEMORY_BLOCK < size ? size : MEMORY_BLOCK);
|
mem->next = create_memory_block(MEMORY_BLOCK < nb_elements*size ? nb_elements*size : MEMORY_BLOCK);
|
||||||
}
|
}
|
||||||
return allocate_memory(size, mem->next);
|
return allocate_memory(nb_elements, size, mem->next);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -118,21 +131,21 @@ Memory* free_memory(void* ptr, Memory* mem) {
|
|||||||
#ifdef __CUDACC__
|
#ifdef __CUDACC__
|
||||||
extern "C"
|
extern "C"
|
||||||
#endif
|
#endif
|
||||||
void* nalloc(size_t sz) {
|
void* nalloc(int nb_elements, size_t size) {
|
||||||
#if defined(__CUDACC__) || defined(TEST_MEMORY_MANAGEMENT)
|
#if defined(__CUDACC__) || defined(TEST_MEMORY_MANAGEMENT)
|
||||||
pthread_mutex_lock(&memory_lock);
|
pthread_mutex_lock(&memory_lock);
|
||||||
if (!memory) {
|
if (!memory) {
|
||||||
// We allocate a new memory block
|
// We allocate a new memory block
|
||||||
memory = create_memory_block(MEMORY_BLOCK < sz ? sz : MEMORY_BLOCK);
|
memory = create_memory_block(MEMORY_BLOCK < nb_elements*size ? nb_elements*size : MEMORY_BLOCK);
|
||||||
}
|
}
|
||||||
//printf("Distinct allocations: %d Blocks: %d\n", get_distinct_allocations(memory), get_length(memory));
|
//printf("Distinct allocations: %d Blocks: %d\n", get_distinct_allocations(memory), get_length(memory));
|
||||||
//printf("Requested memory of size %ld\n", sz);
|
//printf("Requested memory of size %ld\n", sz);
|
||||||
void* ptr = allocate_memory(sz, memory);
|
void* ptr = allocate_memory(nb_elements, size, memory);
|
||||||
|
|
||||||
pthread_mutex_unlock(&memory_lock);
|
pthread_mutex_unlock(&memory_lock);
|
||||||
return ptr;
|
return ptr;
|
||||||
#else
|
#else
|
||||||
void* ptr = malloc(sz);
|
void* ptr = malloc(size*nb_elements);
|
||||||
return ptr;
|
return ptr;
|
||||||
#endif
|
#endif
|
||||||
}
|
}
|
||||||
|
@ -69,20 +69,34 @@ Memory* create_memory_block(size_t size) {
|
|||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
void* allocate_memory(size_t size, Memory* mem) {
|
void* allocate_memory(int nb_elements, size_t size, Memory* mem) {
|
||||||
|
/*
|
||||||
|
* cursor_aligned pointe vers le premier emplacement qui pourrait être utilisé (de manière alignée).
|
||||||
|
* en effet, la mémoire nécessite d'être alignée avec CUDA:
|
||||||
|
* https://docs.nvidia.com/cuda/cuda-c-programming-guide/index.html#device-memory-accesses
|
||||||
|
*/
|
||||||
|
void* aligned_cursor = mem->cursor;
|
||||||
|
#ifdef __CUDACC__
|
||||||
|
// Cela devrait être faisable avec opérateurs binaires directement, mais on préfèrera quelque chose de lisible et vérifiable
|
||||||
|
if (((intptr_t)mem->cursor) %size != 0) {
|
||||||
|
if (size == 2 || size == 4 || size == 8 || size == 16)
|
||||||
|
aligned_cursor = (void*)(((intptr_t)mem->cursor) + (size - (((intptr_t)mem->cursor) %size)));
|
||||||
|
}
|
||||||
|
#endif
|
||||||
// Si il y a suffisamment de mémoire disponible
|
// Si il y a suffisamment de mémoire disponible
|
||||||
if (mem->size - ((intptr_t)mem->cursor - (intptr_t)mem->start) >= size) {
|
if (mem->size - ((intptr_t)aligned_cursor - (intptr_t)mem->start) >= nb_elements*size) {
|
||||||
void* ptr = mem->cursor;
|
void* ptr = aligned_cursor;
|
||||||
mem->cursor = (void*)((intptr_t)mem->cursor + size); // On décale le curseur de la taille allouée
|
mem->cursor = (void*)((intptr_t)aligned_cursor + nb_elements*size); // On décale le curseur de la taille allouée
|
||||||
mem->nb_alloc++;
|
mem->nb_alloc++;
|
||||||
return ptr;
|
return ptr;
|
||||||
} else {
|
} else {
|
||||||
//printf("Mémoire disponible: %ld. Nécessaire: %ld\n", mem->size - ((intptr_t)mem->cursor - (intptr_t)mem->start), size);
|
//printf("Mémoire disponible: %ld. Nécessaire: %ld\n", mem->size - ((intptr_t)mem->cursor - (intptr_t)mem->start), nb_elements*size);
|
||||||
// Sinon on continue sur l'élément suivant de la liste
|
// Sinon on continue sur l'élément suivant de la liste
|
||||||
if (!mem->next) {
|
if (!mem->next) {
|
||||||
mem->next = create_memory_block(MEMORY_BLOCK < size ? size : MEMORY_BLOCK);
|
//! WARNING: May cause Infinite allocations when trying to allocate more than MEMORY_BLOCK size at once that is not naturally aligned (CUDA only)
|
||||||
|
mem->next = create_memory_block(MEMORY_BLOCK < nb_elements*size ? nb_elements*size : MEMORY_BLOCK);
|
||||||
}
|
}
|
||||||
return allocate_memory(size, mem->next);
|
return allocate_memory(nb_elements, size, mem->next);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -118,21 +132,21 @@ Memory* free_memory(void* ptr, Memory* mem) {
|
|||||||
#ifdef __CUDACC__
|
#ifdef __CUDACC__
|
||||||
extern "C"
|
extern "C"
|
||||||
#endif
|
#endif
|
||||||
void* nalloc(size_t sz) {
|
void* nalloc(int nb_elements, size_t size) {
|
||||||
#if defined(__CUDACC__) || defined(TEST_MEMORY_MANAGEMENT)
|
#if defined(__CUDACC__) || defined(TEST_MEMORY_MANAGEMENT)
|
||||||
pthread_mutex_lock(&memory_lock);
|
pthread_mutex_lock(&memory_lock);
|
||||||
if (!memory) {
|
if (!memory) {
|
||||||
// We allocate a new memory block
|
// We allocate a new memory block
|
||||||
memory = create_memory_block(MEMORY_BLOCK < sz ? sz : MEMORY_BLOCK);
|
memory = create_memory_block(MEMORY_BLOCK < nb_elements*size ? nb_elements*size : MEMORY_BLOCK);
|
||||||
}
|
}
|
||||||
//printf("Distinct allocations: %d Blocks: %d\n", get_distinct_allocations(memory), get_length(memory));
|
//printf("Distinct allocations: %d Blocks: %d\n", get_distinct_allocations(memory), get_length(memory));
|
||||||
//printf("Requested memory of size %ld\n", sz);
|
//printf("Requested memory of size %ld\n", sz);
|
||||||
void* ptr = allocate_memory(sz, memory);
|
void* ptr = allocate_memory(nb_elements, size, memory);
|
||||||
|
|
||||||
pthread_mutex_unlock(&memory_lock);
|
pthread_mutex_unlock(&memory_lock);
|
||||||
return ptr;
|
return ptr;
|
||||||
#else
|
#else
|
||||||
void* ptr = malloc(sz);
|
void* ptr = malloc(size*nb_elements);
|
||||||
return ptr;
|
return ptr;
|
||||||
#endif
|
#endif
|
||||||
}
|
}
|
||||||
|
@ -42,11 +42,11 @@ void print_matrix(float** mat, int n, int p) {
|
|||||||
|
|
||||||
|
|
||||||
float*** create_matrix(int n, int p, int q, float max_val) {
|
float*** create_matrix(int n, int p, int q, float max_val) {
|
||||||
float*** matrix = (float***)nalloc(n*sizeof(float**));
|
float*** matrix = (float***)nalloc(n, sizeof(float**));
|
||||||
for (int i=0; i < n; i++) {
|
for (int i=0; i < n; i++) {
|
||||||
matrix[i] = (float**)nalloc(sizeof(float*)*p);
|
matrix[i] = (float**)nalloc(p, sizeof(float*));
|
||||||
for (int j=0; j < p; j++) {
|
for (int j=0; j < p; j++) {
|
||||||
matrix[i][j] = (float*)nalloc(sizeof(float)*q);
|
matrix[i][j] = (float*)nalloc(q, sizeof(float));
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -56,11 +56,11 @@ float*** create_matrix(int n, int p, int q, float max_val) {
|
|||||||
|
|
||||||
|
|
||||||
float*** create_empty_matrix(int n, int p, int q) {
|
float*** create_empty_matrix(int n, int p, int q) {
|
||||||
float*** matrix = (float***)nalloc(n*sizeof(float**));
|
float*** matrix = (float***)nalloc(n, sizeof(float**));
|
||||||
for (int i=0; i < n; i++) {
|
for (int i=0; i < n; i++) {
|
||||||
matrix[i] = (float**)nalloc(sizeof(float*)*p);
|
matrix[i] = (float**)nalloc(p, sizeof(float*));
|
||||||
for (int j=0; j < p; j++) {
|
for (int j=0; j < p; j++) {
|
||||||
matrix[i][j] = (float*)nalloc(sizeof(float)*q);
|
matrix[i][j] = (float*)nalloc(q, sizeof(float));
|
||||||
for (int k=0; k < q; k++) {
|
for (int k=0; k < q; k++) {
|
||||||
matrix[i][j][k] = 0.;
|
matrix[i][j][k] = 0.;
|
||||||
}
|
}
|
||||||
@ -98,7 +98,7 @@ void run_convolution_test(int input_dim, int output_dim, int rows, int columns)
|
|||||||
int k_size = input_dim - output_dim +1;
|
int k_size = input_dim - output_dim +1;
|
||||||
|
|
||||||
// Génération des données aléatoires
|
// Génération des données aléatoires
|
||||||
Kernel_cnn* kernel = (Kernel_cnn*)nalloc(sizeof(Kernel_cnn));
|
Kernel_cnn* kernel = (Kernel_cnn*)nalloc(1, sizeof(Kernel_cnn));
|
||||||
|
|
||||||
kernel->k_size = k_size;
|
kernel->k_size = k_size;
|
||||||
kernel->rows = rows;
|
kernel->rows = rows;
|
||||||
@ -109,8 +109,8 @@ void run_convolution_test(int input_dim, int output_dim, int rows, int columns)
|
|||||||
kernel->d_bias = create_matrix(kernel->columns, output_dim, output_dim, 1.5f);
|
kernel->d_bias = create_matrix(kernel->columns, output_dim, output_dim, 1.5f);
|
||||||
|
|
||||||
// weights[rows][columns][k_size][k_size]
|
// weights[rows][columns][k_size][k_size]
|
||||||
kernel->weights = (float****)nalloc(sizeof(float***)*kernel->rows);
|
kernel->weights = (float****)nalloc(kernel->rows, sizeof(float***));
|
||||||
kernel->d_weights = (float****)nalloc(sizeof(float***)*kernel->rows);
|
kernel->d_weights = (float****)nalloc(kernel->rows, sizeof(float***));
|
||||||
for (int i=0; i < kernel->rows; i++) {
|
for (int i=0; i < kernel->rows; i++) {
|
||||||
kernel->weights[i] = create_matrix(kernel->columns, kernel->k_size, kernel->k_size, 15.0f);
|
kernel->weights[i] = create_matrix(kernel->columns, kernel->k_size, kernel->k_size, 15.0f);
|
||||||
kernel->d_weights[i] = create_matrix(kernel->columns, kernel->k_size, kernel->k_size, 1.5f);
|
kernel->d_weights[i] = create_matrix(kernel->columns, kernel->k_size, kernel->k_size, 1.5f);
|
||||||
|
@ -38,9 +38,9 @@ void print_matrix(float** mat, int n, int p) {
|
|||||||
|
|
||||||
|
|
||||||
float** create_matrix(int n, int p) {
|
float** create_matrix(int n, int p) {
|
||||||
float** matrix = (float**)nalloc(n*sizeof(float*));
|
float** matrix = (float**)nalloc(n, sizeof(float*));
|
||||||
for (int i=0; i < n; i++) {
|
for (int i=0; i < n; i++) {
|
||||||
matrix[i] = (float*)nalloc(sizeof(float)*p);
|
matrix[i] = (float*)nalloc(p, sizeof(float));
|
||||||
}
|
}
|
||||||
|
|
||||||
fill_matrix_random(matrix, n, p);
|
fill_matrix_random(matrix, n, p);
|
||||||
@ -49,9 +49,9 @@ float** create_matrix(int n, int p) {
|
|||||||
|
|
||||||
|
|
||||||
float** create_empty_matrix(int n, int p) {
|
float** create_empty_matrix(int n, int p) {
|
||||||
float** matrix = (float**)nalloc(n*sizeof(float*));
|
float** matrix = (float**)nalloc(n, sizeof(float*));
|
||||||
for (int i=0; i < n; i++) {
|
for (int i=0; i < n; i++) {
|
||||||
matrix[i] = (float*)nalloc(p*sizeof(float));
|
matrix[i] = (float*)nalloc(p, sizeof(float));
|
||||||
for (int j=0; j < p; j++) {
|
for (int j=0; j < p; j++) {
|
||||||
matrix[i][j] = 0.;
|
matrix[i][j] = 0.;
|
||||||
}
|
}
|
||||||
|
@ -14,7 +14,7 @@ int main() {
|
|||||||
// We pollute a little bit the memory before the tests
|
// We pollute a little bit the memory before the tests
|
||||||
int* pointeurs[N];
|
int* pointeurs[N];
|
||||||
for (int i=1; i < N; i++) {
|
for (int i=1; i < N; i++) {
|
||||||
pointeurs[i] = nalloc(i*sizeof(int));
|
pointeurs[i] = (int*)nalloc(i, sizeof(int));
|
||||||
for (int j=0; j < i; j++) {
|
for (int j=0; j < i; j++) {
|
||||||
pointeurs[i][j] = i;
|
pointeurs[i][j] = i;
|
||||||
}
|
}
|
||||||
@ -23,14 +23,14 @@ int main() {
|
|||||||
// We test in a first place that one simple allocation works as expected
|
// We test in a first place that one simple allocation works as expected
|
||||||
mem_used = get_memory_distinct_allocations();
|
mem_used = get_memory_distinct_allocations();
|
||||||
blocks_used = get_memory_blocks_number();
|
blocks_used = get_memory_blocks_number();
|
||||||
void* ptr = nalloc(15);
|
void* ptr = nalloc(15, 1);
|
||||||
if (! (get_memory_distinct_allocations() <= mem_used+1)) {
|
if (! (get_memory_distinct_allocations() <= mem_used+1)) {
|
||||||
printf_error("Plus d'un élément de mémoire alloué en une seule allocation\n");
|
printf_error((char*)"Plus d'un élément de mémoire alloué en une seule allocation\n");
|
||||||
exit(1);
|
exit(1);
|
||||||
}
|
}
|
||||||
gree(ptr);
|
gree(ptr);
|
||||||
if (! (get_memory_blocks_number() == blocks_used)) {
|
if (! (get_memory_blocks_number() == blocks_used)) {
|
||||||
printf_error("La mémoire n'a pas été libérée correctement\n");
|
printf_error((char*)"La mémoire n'a pas été libérée correctement\n");
|
||||||
exit(1);
|
exit(1);
|
||||||
}
|
}
|
||||||
printf(GREEN "OK\n" RESET);
|
printf(GREEN "OK\n" RESET);
|
||||||
@ -40,10 +40,10 @@ int main() {
|
|||||||
printf("Allocation de deux demi-blocs\n");
|
printf("Allocation de deux demi-blocs\n");
|
||||||
// We test that we do not use too much blocks
|
// We test that we do not use too much blocks
|
||||||
blocks_used = get_memory_blocks_number();
|
blocks_used = get_memory_blocks_number();
|
||||||
void* ptr1 = nalloc(-1+MEMORY_BLOCK/2);
|
void* ptr1 = nalloc(-1+MEMORY_BLOCK/2, 1);
|
||||||
void* ptr2 = nalloc(-1+MEMORY_BLOCK/2);
|
void* ptr2 = nalloc(-1+MEMORY_BLOCK/2, 1);
|
||||||
if (! (get_memory_blocks_number() <= blocks_used +1)) {
|
if (! (get_memory_blocks_number() <= blocks_used +1)) {
|
||||||
printf_error("Trop de blocs ont été alloués par rapport à la mémoire nécessaire\n");
|
printf_error((char*)"Trop de blocs ont été alloués par rapport à la mémoire nécessaire\n");
|
||||||
exit(1);
|
exit(1);
|
||||||
}
|
}
|
||||||
printf(GREEN "OK\n" RESET);
|
printf(GREEN "OK\n" RESET);
|
||||||
@ -62,7 +62,7 @@ int main() {
|
|||||||
gree(ptr1);
|
gree(ptr1);
|
||||||
gree(ptr2);
|
gree(ptr2);
|
||||||
if (! (get_memory_distinct_allocations() == 0 && get_memory_blocks_number() == 0)) {
|
if (! (get_memory_distinct_allocations() == 0 && get_memory_blocks_number() == 0)) {
|
||||||
printf_error("La mémoire n'a pas été libérée correctement\n");
|
printf_error((char*)"La mémoire n'a pas été libérée correctement\n");
|
||||||
exit(1);
|
exit(1);
|
||||||
}
|
}
|
||||||
printf(GREEN "OK\n" RESET);
|
printf(GREEN "OK\n" RESET);
|
||||||
|
@ -23,7 +23,7 @@ int main() {
|
|||||||
// We pollute a little bit the memory before the tests
|
// We pollute a little bit the memory before the tests
|
||||||
int* pointeurs[N];
|
int* pointeurs[N];
|
||||||
for (int i=1; i < N; i++) {
|
for (int i=1; i < N; i++) {
|
||||||
pointeurs[i] = (int*)nalloc(i*sizeof(int));
|
pointeurs[i] = (int*)nalloc(i, sizeof(int));
|
||||||
for (int j=0; j < i; j++) {
|
for (int j=0; j < i; j++) {
|
||||||
pointeurs[i][j] = i;
|
pointeurs[i][j] = i;
|
||||||
}
|
}
|
||||||
@ -32,7 +32,7 @@ int main() {
|
|||||||
// We test in a first place that one simple allocation works as expected
|
// We test in a first place that one simple allocation works as expected
|
||||||
mem_used = get_memory_distinct_allocations();
|
mem_used = get_memory_distinct_allocations();
|
||||||
blocks_used = get_memory_blocks_number();
|
blocks_used = get_memory_blocks_number();
|
||||||
void* ptr = nalloc(15);
|
void* ptr = nalloc(15, 1);
|
||||||
if (! (get_memory_distinct_allocations() <= mem_used+1)) {
|
if (! (get_memory_distinct_allocations() <= mem_used+1)) {
|
||||||
printf("Plus d'un élément de mémoire alloué en une seule allocation\n");
|
printf("Plus d'un élément de mémoire alloué en une seule allocation\n");
|
||||||
exit(1);
|
exit(1);
|
||||||
@ -46,8 +46,8 @@ int main() {
|
|||||||
|
|
||||||
|
|
||||||
printf("Vérification de l'accès CUDA\n");
|
printf("Vérification de l'accès CUDA\n");
|
||||||
/* On lance des kernels detaille 1 ce qui est itératif synchrone
|
/* On lance des kernels de taille 1 ce qui est à la fois itératif et synchrone
|
||||||
* Donc un peu contraire à CUDA mais l'objectif est de débugger faiclement */
|
* Donc un peu contraire à CUDA mais l'objectif est de pouvoir débugger facilement */
|
||||||
dim3 gridSize(1, 1, 1);
|
dim3 gridSize(1, 1, 1);
|
||||||
dim3 blockSize(1, 1, 1);
|
dim3 blockSize(1, 1, 1);
|
||||||
|
|
||||||
@ -62,8 +62,8 @@ int main() {
|
|||||||
printf("Allocation de deux demi-blocs\n");
|
printf("Allocation de deux demi-blocs\n");
|
||||||
// We test that we do not use too much blocks
|
// We test that we do not use too much blocks
|
||||||
blocks_used = get_memory_blocks_number();
|
blocks_used = get_memory_blocks_number();
|
||||||
void* ptr1 = nalloc(-1+MEMORY_BLOCK/2);
|
void* ptr1 = nalloc(-1+MEMORY_BLOCK/2, 1);
|
||||||
void* ptr2 = nalloc(-1+MEMORY_BLOCK/2);
|
void* ptr2 = nalloc(-1+MEMORY_BLOCK/2, 1);
|
||||||
if (! (get_memory_blocks_number() <= blocks_used +1)) {
|
if (! (get_memory_blocks_number() <= blocks_used +1)) {
|
||||||
printf("Trop de blocs ont été alloués par rapport à la mémoire nécessaire\n");
|
printf("Trop de blocs ont été alloués par rapport à la mémoire nécessaire\n");
|
||||||
exit(1);
|
exit(1);
|
||||||
|
Loading…
Reference in New Issue
Block a user