tipe/test/cnn_function.cu
2023-11-10 23:09:48 +01:00

106 lines
3.6 KiB
Plaintext

#include <stdlib.h>
#include <assert.h>
#include <stdio.h>
#include "include/test.h"
#include "../src/common/include/memory_management.h"
#include "../src/common/include/colors.h"
#include "../src/common/include/utils.h"
#include "../src/cnn/include/function.h"
#include "../src/cnn/include/config.h"
__global__ void local_kernel(funcPtr f, float*** input, int depth, int rows, int columns) {
// Équivalents respectifs de i, j et k dans la boucle effectuée par le cpu
int idx = threadIdx.x + blockDim.x*blockIdx.x; // < depth
int idy = threadIdx.y + blockDim.y*blockIdx.y; // < rows
int idz = threadIdx.z + blockDim.z*blockIdx.z; // < columns
if (idx >= depth || idy >= rows || idz >= columns) {
return;
}
input[idx][idy][idz] = (*f)(input[idx][idy][idz]);
}
void test1(int activation, bool use_local_kernel) {
printf("Test sur la fonction %d\n", activation);
// Initialise values
int depth = 10;
int rows = 10;
int columns = 10;
float*** input = (float***)nalloc(depth, sizeof(float**));
float*** input_initial = (float***)malloc(depth*sizeof(float**));
for (int i=0; i < depth; i++) {
input[i] = (float**)nalloc(rows, sizeof(float*));
input_initial[i] = (float**)malloc(rows*sizeof(float*));
for (int j=0; j < rows; j++) {
input[i][j] = (float*)nalloc(columns, sizeof(float));
input_initial[i][j] = (float*)malloc(columns*sizeof(float));
for (int k=0; k < columns; k++) {
input[i][j][k] = rand()/(float)RAND_MAX;
input_initial[i][j][k] = input[i][j][k];
}
}
}
funcPtr func_cpu = get_activation_function(activation);
if (!use_local_kernel) {
printf("\tCalcul par CUDA\n");
apply_function_input(activation, input, depth, rows, columns);
} else {
printf("\tCalcul par CUDA sur le kernel local\n");
dim3 gridSize(i_div_up(depth, BLOCKSIZE_x), i_div_up(rows, BLOCKSIZE_y), i_div_up(columns, BLOCKSIZE_z));
dim3 blockSize(BLOCKSIZE_x, BLOCKSIZE_y, BLOCKSIZE_z);
funcPtr function_cuda = get_activation_function_cuda(activation);
local_kernel<<<gridSize, blockSize>>>(function_cuda, input, depth, rows, columns);
gpuErrchk( cudaPeekAtLastError() );
gpuErrchk( cudaDeviceSynchronize() );
}
for (int i=0; i < depth; i++) {
for (int j=0; j < rows; j++) {
for (int k=0; k < columns; k++) {
if (fabs((*func_cpu)(input_initial[i][j][k]) - input[i][j][k]) > 1e-6) {
_TEST_ASSERT(false, "Coïncidence des résultats");
//printf("Différence %e\n", fabs((*func_cpu)(input_initial[i][j][k]) - input[i][j][k]));
}
}
gree(input[i][j], false);
free(input_initial[i][j]);
}
gree(input[i], false);
free(input_initial[i]);
}
gree(input, false);
free(input_initial);
}
int main() {
_TEST_PRESENTATION("Cuda fonctions");
printf("Checking CUDA compatibility.\n");
bool cuda_compatible = cuda_setup(true);
if (!cuda_compatible) {
printf(RED "CUDA not compatible, skipping tests.\n" RESET);
return 0;
}
printf(GREEN "OK\n" RESET);
for (int i=1; i < 7; i++) {
if (i != 5) { // Exclude SOFTMAX
test1(i, false); // use function i
test1(-i, false); // use function i'
test1(i, true); // use function i in the kernel declared in this file
test1(-i, true); // use function i' in the kernel declared in this file
}
}
return 0;
}