diff --git a/test/cnn_function.cu b/test/cnn_function.cu index 54b7fd3..bc5649b 100644 --- a/test/cnn_function.cu +++ b/test/cnn_function.cu @@ -7,17 +7,25 @@ #include "../src/include/colors.h" #include "../src/include/utils.h" +#include "../src/cnn/include/config.h" -int main() { - printf("Checking CUDA compatibility.\n"); - bool cuda_compatible = check_cuda_compatibility(); - if (!cuda_compatible) { - printf(RED "CUDA not compatible, skipping tests.\n" RESET); - return 0; +__global__ void local_kernel(funcPtr f, float*** input, int depth, int rows, int columns) { + // Équivalents respectifs de i, j et k dans la boucle effectuée par le cpu + int idx = threadIdx.x + blockDim.x*blockIdx.x; // < depth + int idy = threadIdx.y + blockDim.y*blockIdx.y; // < rows + int idz = threadIdx.z + blockDim.z*blockIdx.z; // < columns + + if (idx >= depth || idy >= rows || idz >= columns) { + return; } - printf(GREEN "OK\n" RESET); - printf("Initialisation OK\n"); + input[idx][idy][idz] = (*f)(input[idx][idy][idz]); +} + + +void test1(int activation, bool use_local_kernel) { + printf("Test sur la fonction %d\n", activation); + printf("\tInitialisation OK\n"); // Initialise values int depth = 10; int rows = 10; @@ -32,27 +40,40 @@ int main() { input[i][j] = (float*)nalloc(columns, sizeof(float)); input_initial[i][j] = (float*)malloc(columns*sizeof(float)); for (int k=0; k < columns; k++) { - input[i][j][k] = rand()/RAND_MAX; + input[i][j][k] = rand()/(float)RAND_MAX; input_initial[i][j][k] = input[i][j][k]; } } } - printf(GREEN "OK\n" RESET); + printf("\t" GREEN "OK\n" RESET); - funcPtr func = get_activation_function(TANH); + funcPtr func_cpu = get_activation_function(activation); - printf("Calcul par CUDA\n"); - apply_function_input(TANH, input, depth, rows, columns); - printf(GREEN "OK\n" RESET); + if (!use_local_kernel) { + printf("\tCalcul par CUDA\n"); + apply_function_input(activation, input, depth, rows, columns); + } else { + printf("\tCalcul par CUDA sur le kernel local\n"); + dim3 gridSize(i_div_up(depth, BLOCKSIZE_x), i_div_up(rows, BLOCKSIZE_y), i_div_up(columns, BLOCKSIZE_z)); + dim3 blockSize(BLOCKSIZE_x, BLOCKSIZE_y, BLOCKSIZE_z); - printf("Vérification des résultats\n"); + funcPtr function_cuda = get_activation_function_cuda(activation); + + local_kernel<<>>(function_cuda, input, depth, rows, columns); + + gpuErrchk( cudaPeekAtLastError() ); + gpuErrchk( cudaDeviceSynchronize() ); + } + printf("\t" GREEN "OK\n" RESET); + + printf("\tVérification des résultats\n"); for (int i=0; i < depth; i++) { for (int j=0; j < rows; j++) { for (int k=0; k < columns; k++) { - if (fabs((*func)(input_initial[i][j][k]) - input[i][j][k]) > 1e-6) { + if (fabs((*func_cpu)(input_initial[i][j][k]) - input[i][j][k]) > 1e-6) { printf_error((char*)"Les résultats ne coincident pas\n"); - printf("Différence %e\n", fabs((*func)(input_initial[i][j][k]) - input[i][j][k])); - //exit(1); + printf("Différence %e\n", fabs((*func_cpu)(input_initial[i][j][k]) - input[i][j][k])); + exit(1); } } gree(input[i][j]); @@ -64,6 +85,26 @@ int main() { gree(input); free(input_initial); + printf("\t" GREEN "OK\n" RESET); printf(GREEN "OK\n" RESET); +} + +int main() { + printf("Checking CUDA compatibility.\n"); + bool cuda_compatible = check_cuda_compatibility(); + if (!cuda_compatible) { + printf(RED "CUDA not compatible, skipping tests.\n" RESET); + return 0; + } + printf(GREEN "OK\n" RESET); + + for (int i=1; i < 7; i++) { + if (i != 5) { // Exclude SOFTMAX + test1(i, false); // use function i + test1(-i, false); // use function i' + test1(i, true); // use function i in the kernel declared in this file + test1(-i, true); // use function i' in the kernel declared in this file + } + } return 0; } \ No newline at end of file