From 4839872c9b38f3b9b4f3981733be67d56c6a16e1 Mon Sep 17 00:00:00 2001 From: augustin64 Date: Fri, 14 Oct 2022 16:30:28 +0200 Subject: [PATCH] Fix bad use of cudaMemcpy --- src/cnn/matrix_multiplication.cu | 10 +++++++--- 1 file changed, 7 insertions(+), 3 deletions(-) diff --git a/src/cnn/matrix_multiplication.cu b/src/cnn/matrix_multiplication.cu index a1819df..61394dc 100644 --- a/src/cnn/matrix_multiplication.cu +++ b/src/cnn/matrix_multiplication.cu @@ -102,10 +102,14 @@ void matrix_multiplication(float** m1, float** m2, float** result, int n, int p, float* result_dev; gpuErrchk( cudaMallocPitch((void**)&m1_dev, &pitch_m1_dev, p * sizeof(float), n)); - gpuErrchk( cudaMemcpy2D(m1_dev, pitch_m1_dev, &m1, p*sizeof(float), p* sizeof(float), n, cudaMemcpyHostToDevice)); + for (int i=0; i < n; i++) { + gpuErrchk( cudaMemcpy2D((void*)((char*)m1_dev + i*pitch_m1_dev), pitch_m1_dev, (const void*)&(m1[i][0]), p*sizeof(float), p*sizeof(float), 1, cudaMemcpyHostToDevice)); + } gpuErrchk( cudaMallocPitch((void**)&m2_dev, &pitch_m2_dev, q * sizeof(float), p)); - gpuErrchk( cudaMemcpy2D(m2_dev, pitch_m2_dev, &m2, q*sizeof(float), q* sizeof(float), p, cudaMemcpyHostToDevice)); + for (int i=0; i < p; i++) { + gpuErrchk( cudaMemcpy2D((void*)((char*)m2_dev + i*pitch_m2_dev), pitch_m2_dev, (const void*)&(m2[i][0]), q*sizeof(float), q*sizeof(float), 1, cudaMemcpyHostToDevice)); + } gpuErrchk( cudaMallocPitch((void**)&result_dev, &pitch_result_dev, q * sizeof(float), n)); @@ -118,7 +122,7 @@ void matrix_multiplication(float** m1, float** m2, float** result, int n, int p, gpuErrchk( cudaDeviceSynchronize() ); // Post-traitement - for (int i=0; i < q; i++) { + for (int i=0; i < n; i++) { gpuErrchk( cudaMemcpy2D((void*)&(result[i][0]), q*sizeof(float), (const void*)((char*)result_dev + i*pitch_result_dev), pitch_result_dev, sizeof(float)*q, 1, cudaMemcpyDeviceToHost)); }