From 9c560ef534d146ca7f55aa3c8a2fe79cb3df95bb Mon Sep 17 00:00:00 2001 From: augustin64 Date: Fri, 14 Oct 2022 18:17:29 +0200 Subject: [PATCH] Add matrix_multiplication test --- src/cnn/matrix_multiplication.cu | 80 +------------------- test/matrix_multiplication.cu | 123 +++++++++++++++++++++++++++++++ 2 files changed, 124 insertions(+), 79 deletions(-) create mode 100644 test/matrix_multiplication.cu diff --git a/src/cnn/matrix_multiplication.cu b/src/cnn/matrix_multiplication.cu index 985b45d..671eb6c 100644 --- a/src/cnn/matrix_multiplication.cu +++ b/src/cnn/matrix_multiplication.cu @@ -1,7 +1,6 @@ #include #include #include -#include #define BLOCKSIZE_x 16 #define BLOCKSIZE_y 16 @@ -17,54 +16,6 @@ inline void gpuAssert(cudaError_t code, const char *file, int line, bool abort=t } #endif -float random_float(float low, float high) { - float t = (float)rand() / (float)RAND_MAX; - return (1.0f - t) * low + t * high; -} - - -void fill_matrix_random(float **matrix, int n, int p) { - for (int i=0; i < n; i++) { - for (int j=0; j < p; j++) { - matrix[i][j] = random_float(0.0f, 15.0f); - } - } -} - - -void print_matrix(float** mat, int n, int p) { - for (int i=0; i < n; i++) { - printf("[\t"); - for (int j=0; j < p; j++) { - printf("%0.1f\t", mat[i][j]); - } - printf("]\n"); - } -} - - -float** create_matrix(int n, int p) { - float** matrix = (float**)malloc(n*sizeof(float*)); - for (int i=0; i < n; i++) { - matrix[i] = (float*)malloc(sizeof(float)*p); - } - - fill_matrix_random(matrix, n, p); - return matrix; -} - - -float** create_empty_matrix(int n, int p) { - float** matrix = (float**)malloc(n*sizeof(float*)); - for (int i=0; i < n; i++) { - matrix[i] = (float*)malloc(p*sizeof(float)); - for (int j=0; j < p; j++) { - matrix[i][j] = 0.; - } - } - return matrix; -} - #ifdef __CUDACC__ int i_div_up(int hostPtr, int b){ @@ -169,7 +120,7 @@ void matrix_multiplication_host(float** m1, float** m2, float** result, int n, i for (int j=0; j < q; j++) { result[i][j] = 0.; for (int k=0; k < p; k++) { - result[i][j] += m1[i][k] + m2[k][j]; + result[i][j] += m1[i][k] * m2[k][j]; } } } @@ -186,33 +137,4 @@ void matrix_multiplication(float** m1, float** m2, float** result, int n, int p, #else matrix_multiplication_host(m1, m2, result, n, p, q); #endif -} - - -int main() { - srand(time(NULL)); - int n = 3; - int p = 3; - int q = 3; - float** matrix1 = create_matrix(n, p); - float** matrix2 = create_matrix(p, q); - float** result = create_empty_matrix(n, q); - - clock_t start, end; - double cpu_time_used; - - start = clock(); - matrix_multiplication(matrix1, matrix2, result, n, p, q, check_cuda_compatibility()); - end = clock(); - - cpu_time_used = ((double) (end - start)) / CLOCKS_PER_SEC; - printf("Time used: %lf seconds\n", cpu_time_used); - - print_matrix(matrix1, n, p); - printf("\n"); - print_matrix(matrix2, p, q); - printf("\n"); - print_matrix(result, n, q); - - return 0; } \ No newline at end of file diff --git a/test/matrix_multiplication.cu b/test/matrix_multiplication.cu new file mode 100644 index 0000000..6f27547 --- /dev/null +++ b/test/matrix_multiplication.cu @@ -0,0 +1,123 @@ +#include +#include +#include +#include +#include + +#include "../src/cnn/matrix_multiplication.cu" + + +float random_float(float low, float high) { + float t = (float)rand() / (float)RAND_MAX; + return (1.0f - t) * low + t * high; +} + + +void fill_matrix_random(float **matrix, int n, int p) { + for (int i=0; i < n; i++) { + for (int j=0; j < p; j++) { + matrix[i][j] = random_float(0.0f, 15.0f); + } + } +} + + +void print_matrix(float** mat, int n, int p) { + for (int i=0; i < n; i++) { + printf("[\t"); + for (int j=0; j < p; j++) { + printf("%0.1f\t", mat[i][j]); + } + printf("]\n"); + } +} + + +float** create_matrix(int n, int p) { + float** matrix = (float**)malloc(n*sizeof(float*)); + for (int i=0; i < n; i++) { + matrix[i] = (float*)malloc(sizeof(float)*p); + } + + fill_matrix_random(matrix, n, p); + return matrix; +} + + +float** create_empty_matrix(int n, int p) { + float** matrix = (float**)malloc(n*sizeof(float*)); + for (int i=0; i < n; i++) { + matrix[i] = (float*)malloc(p*sizeof(float)); + for (int j=0; j < p; j++) { + matrix[i][j] = 0.; + } + } + return matrix; +} + + +bool check_matrices_equality(float** m1, float** m2, int n, int p) { + for (int i=0; i < n; i++) { + for (int j=0; j < p; j++) { + if (fabs(m1[i][j] - m2[i][j]) > 0.001) { + return false; + } + } + } + return true; +} + + +int main() { + clock_t start, end; + double cpu_time_used; + + printf("Checking CUDA compatibility.\n"); + bool cuda_compatible = check_cuda_compatibility(); + if (!cuda_compatible) { + printf("CUDA not compatible, skipping tests.\n"); + return 0; + } + printf("OK\n"); + + + printf("Generating matrices.\n"); + srand(time(NULL)); + int n = 3; + int p = 3; + int q = 3; + float** matrix1 = create_matrix(n, p); + float** matrix2 = create_matrix(p, q); + float** result_gpu = create_empty_matrix(n, q); + float** result_cpu = create_empty_matrix(n, q); + printf("OK\n"); + + + printf("Computing on GPU.\n"); + start = clock(); + matrix_multiplication_device(matrix1, matrix2, result_gpu, n, p, q); + end = clock(); + + cpu_time_used = ((double) (end - start)) / CLOCKS_PER_SEC; + printf("Time used for GPU: %lf seconds\n", cpu_time_used); + printf("OK\n"); + + + printf("Computing on CPU.\n"); + start = clock(); + matrix_multiplication_host(matrix1, matrix2, result_gpu, n, p, q); + end = clock(); + + cpu_time_used = ((double) (end - start)) / CLOCKS_PER_SEC; + printf("Time used for CPU: %lf seconds\n", cpu_time_used); + printf("OK\n"); + + + printf("Checking equality.\n"); + if (!check_matrices_equality(result_gpu, result_cpu, n, q)) { + return 1; + } + printf("OK\n"); + + return 0; +} \ No newline at end of file