Remove unused CUDA code

2025-04-22 13:23:53 +02:00 · 2022-10-21 14:22:57 +02:00 · 2022-10-21 14:22:57 +02:00 · 757d641580
commit 757d641580
parent 6ebbfda127
6 changed files with 2 additions and 200 deletions
--- a/src/mnist/cuda_utils.cu
+++ b/src/mnist/cuda_utils.cu
@ -1,54 +0,0 @@
 #include <stdio.h>
 #include <stdlib.h>
 #include "include/mnist.h"
 unsigned int* cudaReadMnistLabels(char* filename) {
    FILE* ptr;
    ptr = fopen(filename, "rb");
    uint32_t magic_number;
    uint32_t number_of_items;
    unsigned int* labels;
    unsigned int* labels_cuda;
    fread(&magic_number, sizeof(uint32_t), 1, ptr);
    magic_number = swap_endian(magic_number);
    if (magic_number != 2049) {
        printf("Incorrect magic number !\n");
        exit(1);
    }
    fread(&number_of_items, sizeof(uint32_t), 1, ptr);
    number_of_items = swap_endian(number_of_items);
    unsigned char buffer[number_of_items];
    fread(buffer, sizeof(unsigned char), number_of_items, ptr);
    labels = (unsigned int*)malloc(sizeof(unsigned int)*number_of_items);
    for (int i=0; i < (int)number_of_items; i++) {
        labels[i] = (unsigned int)buffer[i];
    }
    cudaMalloc(&labels_cuda, (size_t)sizeof(labels));
    cudaMemcpy(labels_cuda, &labels, sizeof(labels), cudaMemcpyHostToDevice);
    free(labels);
    return labels_cuda;
 }
 void check_cuda_compatibility() {
    int nDevices;
    cudaError_t err = cudaGetDeviceCount(&nDevices);
    if (err != cudaSuccess) {
        printf("%s\n", cudaGetErrorString(err));
        exit(1);
    } else {
        printf("CUDA-capable device is detected\n");
    }
 }
--- a/src/mnist/include/cuda_utils.h
+++ b/src/mnist/include/cuda_utils.h
@ -1,22 +0,0 @@
 #include <stdio.h>
 #include <stdlib.h>
 #ifndef DEF_CUDA_UTILS_H
 #define DEF_CUDA_UTILS_H
 #include "../cuda_utils.cu"
 /*
 * Il est entendu par "device" le GPU supportant CUDA utilisé
 */
 /*
 * Lecture des labels et écriture dans la mémoire du device
 */
 unsigned int* cudaReadMnistLabels(char* label_file);
 /*
 * Vérification de la disponibilité d'un device
 */
 void check_cuda_compatibility();
 #endif
--- a/src/mnist/include/neural_network.h
+++ b/src/mnist/include/neural_network.h
@ -86,10 +86,4 @@ Network* copy_network(Network* network);
 */
 float loss_computing(Network* network, int wanted_number);
 #ifdef __CUDACC__
 /*
 * Renvoie une copie modifiable du réseau de neurones
 */
 Network* copy_network_cuda(Network* network);
 #endif
 #endif
--- a/src/mnist/main.c
+++ b/src/mnist/main.c
@ -13,12 +13,6 @@
 #define EPOCHS 10
 #define BATCHES 100
 #ifdef __CUDACC__
 #   warning compiling for CUDA compatible device only
 #   include "cuda_utils.cu"
 #   define MAX_CUDA_THREADS 1024 // from NVIDIA documentation
 #endif
 /*
 * Structure donnée en argument à la fonction 'train_thread'
 */
@ -144,15 +138,8 @@ void train(int epochs, int layers, int neurons, char* recovery, char* image_file
    float accuracy;
    #ifdef __CUDACC__
    printf("Testing compatibility...\n");
    check_cuda_compatibility();
    int nb_threads = MAX_CUDA_THREADS;
    #else
    printf("Pas d'utilisation du GPU\n");
    int nb_threads = get_nprocs();
    pthread_t *tid = (pthread_t *)malloc(nb_threads * sizeof(pthread_t));
    #endif
    /*
    * On repart d'un réseau déjà créée stocké dans un fichier
@ -192,11 +179,6 @@ void train(int epochs, int layers, int neurons, char* recovery, char* image_file
    int*** images = read_mnist_images(image_file);
    unsigned int* labels = read_mnist_labels(label_file);
    #ifdef __CUDACC__
    int*** images_cuda = copy_images_cuda(images, nb_images_total, width, height);
    unsigned int* labels_cuda = copy_labels_cuda(labels);
    #endif
    if (nb_images_to_process != -1) {
        nb_images_total = nb_images_to_process;
    }
@ -224,20 +206,13 @@ void train(int epochs, int layers, int neurons, char* recovery, char* image_file
                }
                nb_remaining_images -= train_parameters[j]->nb_images;
                #ifdef __CUDACC__
                // Création des threads sur le GPU
                #else
                // Création des threads sur le CPU
                pthread_create( &tid[j], NULL, train_thread, (void*) train_parameters[j]);
                #endif
            }
            for(int j=0; j < nb_threads; j++ ) {
                #ifdef __CUDACC__
                // On join les threads créés sur le GPU
                #else
                // On join les threads créés sur le CPU
                pthread_join( tid[j], NULL );
-                #endif
+                
                accuracy += train_parameters[j]->accuracy / (float) nb_images_total;
                if (delta != NULL)
                    patch_delta(delta_network, train_parameters[j]->network, train_parameters[j]->nb_images);
@ -260,12 +235,8 @@ void train(int epochs, int layers, int neurons, char* recovery, char* image_file
        free(train_parameters[j]);
    }
    free(train_parameters);
    #ifdef __CUDACC__
    // On libère les espaces mémoires utilisés sur le GPU
    #else
    // On libère les espaces mémoire utilisés spécialement sur le CPU
    free(tid);
    #endif
 }
 float** recognize(char* modele, char* entree) {
--- a/src/mnist/neural_network.c
+++ b/src/mnist/neural_network.c
@ -371,48 +371,3 @@ float loss_computing(Network* network, int wanted_number){
    }
    return erreur;
 }
 #ifdef __CUDACC__
 Network* copy_network_cuda(Network* network) {
    Network* network2 = NULL;
    Layer* layer;
    Neuron* neuron1;
    Neuron* neuron;
    cudaMalloc(&network2, (size_t)sizeof(Network));
    network2->nb_layers = network->nb_layers;
    cudaMalloc(&network2->layers, (size_t)sizeof(Layer*)*network->nb_layers);
    for (int i=0; i < network2->nb_layers; i++) {
        cudaMalloc(&layer, (size_t)sizeof(Layer));
        layer->nb_neurons = network->layers[i]->nb_neurons;
        cudaMalloc(&layer->neurons, (size_t)sizeof(Neuron*)*layer->nb_neurons);
        for (int j=0; j < layer->nb_neurons; j++) {
            cudaMalloc(&neuron, (size_t)sizeof(Neuron));
            neuron1 = network->layers[i]->neurons[j];
            neuron->bias = neuron1->bias;
            neuron->z = neuron1->z;
            neuron->back_bias = neuron1->back_bias;
            neuron->last_back_bias = neuron1->last_back_bias;
            if (i != network2->nb_layers-1) {
                (void)network2->layers[i+1]->nb_neurons;
                cudaMalloc(&neuron->weights,            (size_t)sizeof(float)*network->layers[i+1]->nb_neurons);
                cudaMalloc(&neuron->back_weights,       (size_t)sizeof(float)*network->layers[i+1]->nb_neurons);
                cudaMalloc(&neuron->last_back_weights,  (size_t)sizeof(float)*network->layers[i+1]->nb_neurons);
                for (int k=0; k < network->layers[i+1]->nb_neurons; k++) {
                    neuron->weights[k] = neuron1->weights[k];
                    neuron->back_weights[k] = neuron1->back_weights[k];
                    neuron->last_back_weights[k] = neuron1->last_back_weights[k];
                }
            }
            layer->neurons[j] = neuron;
        }
    network2->layers[i] = layer;
    }
    return network2;
 }
 #endif
--- a/test/cuda_utils.cu
+++ b/test/cuda_utils.cu
@ -1,42 +0,0 @@
 #include <stdlib.h>
 #include <stdio.h>
 #include "../src/mnist/include/cuda_utils.h"
 #define MAX_CUDA_THREADS 1024
 #define gpuErrchk(ans) { gpuAssert((ans), __FILE__, __LINE__); }
 inline void gpuAssert(cudaError_t code, const char *file, int line, bool abort=true)
 {
   if (code != cudaSuccess) 
   {
      fprintf(stderr,"GPUassert: %s %s %d\n", cudaGetErrorString(code), file, line);
      if (abort) exit(code);
   }
 }
 __global__ void check_labels(int n, unsigned int* labels) {
    for (int i=0; i < n; i++) {
        (void)labels[i];
    }
 }
 int main() {
    printf("Test de la compatibilité CUDA\n");
    check_cuda_compatibility();
    printf("OK\n");
    printf("Lecture des labels\n");
    unsigned int* labels = cudaReadMnistLabels("data/mnist/t10k-labels-idx1-ubyte");
    printf("OK\n");
    printf("Test des labels\n");
    //! TODO: fix
    // Ne provoque pas d'erreurs, mais tous les labels valent 1
    check_labels<<<1, 1>>>(10000, labels);
    gpuErrchk( cudaPeekAtLastError() );
    gpuErrchk( cudaDeviceSynchronize() );
    printf("OK\n");
    return 0;
 }