mirror of
https://github.com/augustin64/projet-tipe
synced 2025-01-24 15:36:25 +01:00
Remove unused CUDA code
This commit is contained in:
parent
6ebbfda127
commit
757d641580
@ -1,54 +0,0 @@
|
|||||||
#include <stdio.h>
|
|
||||||
#include <stdlib.h>
|
|
||||||
|
|
||||||
#include "include/mnist.h"
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
unsigned int* cudaReadMnistLabels(char* filename) {
|
|
||||||
FILE* ptr;
|
|
||||||
|
|
||||||
ptr = fopen(filename, "rb");
|
|
||||||
|
|
||||||
uint32_t magic_number;
|
|
||||||
uint32_t number_of_items;
|
|
||||||
unsigned int* labels;
|
|
||||||
unsigned int* labels_cuda;
|
|
||||||
|
|
||||||
fread(&magic_number, sizeof(uint32_t), 1, ptr);
|
|
||||||
magic_number = swap_endian(magic_number);
|
|
||||||
|
|
||||||
if (magic_number != 2049) {
|
|
||||||
printf("Incorrect magic number !\n");
|
|
||||||
exit(1);
|
|
||||||
}
|
|
||||||
|
|
||||||
fread(&number_of_items, sizeof(uint32_t), 1, ptr);
|
|
||||||
number_of_items = swap_endian(number_of_items);
|
|
||||||
|
|
||||||
unsigned char buffer[number_of_items];
|
|
||||||
fread(buffer, sizeof(unsigned char), number_of_items, ptr);
|
|
||||||
|
|
||||||
labels = (unsigned int*)malloc(sizeof(unsigned int)*number_of_items);
|
|
||||||
|
|
||||||
for (int i=0; i < (int)number_of_items; i++) {
|
|
||||||
labels[i] = (unsigned int)buffer[i];
|
|
||||||
}
|
|
||||||
|
|
||||||
cudaMalloc(&labels_cuda, (size_t)sizeof(labels));
|
|
||||||
cudaMemcpy(labels_cuda, &labels, sizeof(labels), cudaMemcpyHostToDevice);
|
|
||||||
free(labels);
|
|
||||||
return labels_cuda;
|
|
||||||
}
|
|
||||||
|
|
||||||
|
|
||||||
void check_cuda_compatibility() {
|
|
||||||
int nDevices;
|
|
||||||
cudaError_t err = cudaGetDeviceCount(&nDevices);
|
|
||||||
if (err != cudaSuccess) {
|
|
||||||
printf("%s\n", cudaGetErrorString(err));
|
|
||||||
exit(1);
|
|
||||||
} else {
|
|
||||||
printf("CUDA-capable device is detected\n");
|
|
||||||
}
|
|
||||||
}
|
|
@ -1,22 +0,0 @@
|
|||||||
#include <stdio.h>
|
|
||||||
#include <stdlib.h>
|
|
||||||
|
|
||||||
#ifndef DEF_CUDA_UTILS_H
|
|
||||||
#define DEF_CUDA_UTILS_H
|
|
||||||
|
|
||||||
#include "../cuda_utils.cu"
|
|
||||||
/*
|
|
||||||
* Il est entendu par "device" le GPU supportant CUDA utilisé
|
|
||||||
*/
|
|
||||||
|
|
||||||
/*
|
|
||||||
* Lecture des labels et écriture dans la mémoire du device
|
|
||||||
*/
|
|
||||||
unsigned int* cudaReadMnistLabels(char* label_file);
|
|
||||||
|
|
||||||
/*
|
|
||||||
* Vérification de la disponibilité d'un device
|
|
||||||
*/
|
|
||||||
void check_cuda_compatibility();
|
|
||||||
|
|
||||||
#endif
|
|
@ -86,10 +86,4 @@ Network* copy_network(Network* network);
|
|||||||
*/
|
*/
|
||||||
float loss_computing(Network* network, int wanted_number);
|
float loss_computing(Network* network, int wanted_number);
|
||||||
|
|
||||||
#ifdef __CUDACC__
|
|
||||||
/*
|
|
||||||
* Renvoie une copie modifiable du réseau de neurones
|
|
||||||
*/
|
|
||||||
Network* copy_network_cuda(Network* network);
|
|
||||||
#endif
|
|
||||||
#endif
|
#endif
|
||||||
|
@ -13,12 +13,6 @@
|
|||||||
#define EPOCHS 10
|
#define EPOCHS 10
|
||||||
#define BATCHES 100
|
#define BATCHES 100
|
||||||
|
|
||||||
#ifdef __CUDACC__
|
|
||||||
# warning compiling for CUDA compatible device only
|
|
||||||
# include "cuda_utils.cu"
|
|
||||||
# define MAX_CUDA_THREADS 1024 // from NVIDIA documentation
|
|
||||||
#endif
|
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* Structure donnée en argument à la fonction 'train_thread'
|
* Structure donnée en argument à la fonction 'train_thread'
|
||||||
*/
|
*/
|
||||||
@ -144,15 +138,8 @@ void train(int epochs, int layers, int neurons, char* recovery, char* image_file
|
|||||||
|
|
||||||
float accuracy;
|
float accuracy;
|
||||||
|
|
||||||
#ifdef __CUDACC__
|
|
||||||
printf("Testing compatibility...\n");
|
|
||||||
check_cuda_compatibility();
|
|
||||||
int nb_threads = MAX_CUDA_THREADS;
|
|
||||||
#else
|
|
||||||
printf("Pas d'utilisation du GPU\n");
|
|
||||||
int nb_threads = get_nprocs();
|
int nb_threads = get_nprocs();
|
||||||
pthread_t *tid = (pthread_t *)malloc(nb_threads * sizeof(pthread_t));
|
pthread_t *tid = (pthread_t *)malloc(nb_threads * sizeof(pthread_t));
|
||||||
#endif
|
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* On repart d'un réseau déjà créée stocké dans un fichier
|
* On repart d'un réseau déjà créée stocké dans un fichier
|
||||||
@ -192,11 +179,6 @@ void train(int epochs, int layers, int neurons, char* recovery, char* image_file
|
|||||||
int*** images = read_mnist_images(image_file);
|
int*** images = read_mnist_images(image_file);
|
||||||
unsigned int* labels = read_mnist_labels(label_file);
|
unsigned int* labels = read_mnist_labels(label_file);
|
||||||
|
|
||||||
#ifdef __CUDACC__
|
|
||||||
int*** images_cuda = copy_images_cuda(images, nb_images_total, width, height);
|
|
||||||
unsigned int* labels_cuda = copy_labels_cuda(labels);
|
|
||||||
#endif
|
|
||||||
|
|
||||||
if (nb_images_to_process != -1) {
|
if (nb_images_to_process != -1) {
|
||||||
nb_images_total = nb_images_to_process;
|
nb_images_total = nb_images_to_process;
|
||||||
}
|
}
|
||||||
@ -224,20 +206,13 @@ void train(int epochs, int layers, int neurons, char* recovery, char* image_file
|
|||||||
}
|
}
|
||||||
nb_remaining_images -= train_parameters[j]->nb_images;
|
nb_remaining_images -= train_parameters[j]->nb_images;
|
||||||
|
|
||||||
#ifdef __CUDACC__
|
|
||||||
// Création des threads sur le GPU
|
|
||||||
#else
|
|
||||||
// Création des threads sur le CPU
|
// Création des threads sur le CPU
|
||||||
pthread_create( &tid[j], NULL, train_thread, (void*) train_parameters[j]);
|
pthread_create( &tid[j], NULL, train_thread, (void*) train_parameters[j]);
|
||||||
#endif
|
|
||||||
}
|
}
|
||||||
for(int j=0; j < nb_threads; j++ ) {
|
for(int j=0; j < nb_threads; j++ ) {
|
||||||
#ifdef __CUDACC__
|
|
||||||
// On join les threads créés sur le GPU
|
|
||||||
#else
|
|
||||||
// On join les threads créés sur le CPU
|
// On join les threads créés sur le CPU
|
||||||
pthread_join( tid[j], NULL );
|
pthread_join( tid[j], NULL );
|
||||||
#endif
|
|
||||||
accuracy += train_parameters[j]->accuracy / (float) nb_images_total;
|
accuracy += train_parameters[j]->accuracy / (float) nb_images_total;
|
||||||
if (delta != NULL)
|
if (delta != NULL)
|
||||||
patch_delta(delta_network, train_parameters[j]->network, train_parameters[j]->nb_images);
|
patch_delta(delta_network, train_parameters[j]->network, train_parameters[j]->nb_images);
|
||||||
@ -260,12 +235,8 @@ void train(int epochs, int layers, int neurons, char* recovery, char* image_file
|
|||||||
free(train_parameters[j]);
|
free(train_parameters[j]);
|
||||||
}
|
}
|
||||||
free(train_parameters);
|
free(train_parameters);
|
||||||
#ifdef __CUDACC__
|
|
||||||
// On libère les espaces mémoires utilisés sur le GPU
|
|
||||||
#else
|
|
||||||
// On libère les espaces mémoire utilisés spécialement sur le CPU
|
// On libère les espaces mémoire utilisés spécialement sur le CPU
|
||||||
free(tid);
|
free(tid);
|
||||||
#endif
|
|
||||||
}
|
}
|
||||||
|
|
||||||
float** recognize(char* modele, char* entree) {
|
float** recognize(char* modele, char* entree) {
|
||||||
|
@ -371,48 +371,3 @@ float loss_computing(Network* network, int wanted_number){
|
|||||||
}
|
}
|
||||||
return erreur;
|
return erreur;
|
||||||
}
|
}
|
||||||
|
|
||||||
#ifdef __CUDACC__
|
|
||||||
|
|
||||||
Network* copy_network_cuda(Network* network) {
|
|
||||||
Network* network2 = NULL;
|
|
||||||
Layer* layer;
|
|
||||||
Neuron* neuron1;
|
|
||||||
Neuron* neuron;
|
|
||||||
|
|
||||||
cudaMalloc(&network2, (size_t)sizeof(Network));
|
|
||||||
|
|
||||||
network2->nb_layers = network->nb_layers;
|
|
||||||
cudaMalloc(&network2->layers, (size_t)sizeof(Layer*)*network->nb_layers);
|
|
||||||
for (int i=0; i < network2->nb_layers; i++) {
|
|
||||||
cudaMalloc(&layer, (size_t)sizeof(Layer));
|
|
||||||
layer->nb_neurons = network->layers[i]->nb_neurons;
|
|
||||||
cudaMalloc(&layer->neurons, (size_t)sizeof(Neuron*)*layer->nb_neurons);
|
|
||||||
for (int j=0; j < layer->nb_neurons; j++) {
|
|
||||||
cudaMalloc(&neuron, (size_t)sizeof(Neuron));
|
|
||||||
|
|
||||||
neuron1 = network->layers[i]->neurons[j];
|
|
||||||
neuron->bias = neuron1->bias;
|
|
||||||
neuron->z = neuron1->z;
|
|
||||||
neuron->back_bias = neuron1->back_bias;
|
|
||||||
neuron->last_back_bias = neuron1->last_back_bias;
|
|
||||||
if (i != network2->nb_layers-1) {
|
|
||||||
(void)network2->layers[i+1]->nb_neurons;
|
|
||||||
cudaMalloc(&neuron->weights, (size_t)sizeof(float)*network->layers[i+1]->nb_neurons);
|
|
||||||
cudaMalloc(&neuron->back_weights, (size_t)sizeof(float)*network->layers[i+1]->nb_neurons);
|
|
||||||
cudaMalloc(&neuron->last_back_weights, (size_t)sizeof(float)*network->layers[i+1]->nb_neurons);
|
|
||||||
|
|
||||||
for (int k=0; k < network->layers[i+1]->nb_neurons; k++) {
|
|
||||||
neuron->weights[k] = neuron1->weights[k];
|
|
||||||
neuron->back_weights[k] = neuron1->back_weights[k];
|
|
||||||
neuron->last_back_weights[k] = neuron1->last_back_weights[k];
|
|
||||||
}
|
|
||||||
}
|
|
||||||
layer->neurons[j] = neuron;
|
|
||||||
}
|
|
||||||
network2->layers[i] = layer;
|
|
||||||
}
|
|
||||||
return network2;
|
|
||||||
}
|
|
||||||
|
|
||||||
#endif
|
|
@ -1,42 +0,0 @@
|
|||||||
#include <stdlib.h>
|
|
||||||
#include <stdio.h>
|
|
||||||
|
|
||||||
#include "../src/mnist/include/cuda_utils.h"
|
|
||||||
#define MAX_CUDA_THREADS 1024
|
|
||||||
|
|
||||||
#define gpuErrchk(ans) { gpuAssert((ans), __FILE__, __LINE__); }
|
|
||||||
inline void gpuAssert(cudaError_t code, const char *file, int line, bool abort=true)
|
|
||||||
{
|
|
||||||
if (code != cudaSuccess)
|
|
||||||
{
|
|
||||||
fprintf(stderr,"GPUassert: %s %s %d\n", cudaGetErrorString(code), file, line);
|
|
||||||
if (abort) exit(code);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
__global__ void check_labels(int n, unsigned int* labels) {
|
|
||||||
for (int i=0; i < n; i++) {
|
|
||||||
(void)labels[i];
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
|
|
||||||
int main() {
|
|
||||||
printf("Test de la compatibilité CUDA\n");
|
|
||||||
check_cuda_compatibility();
|
|
||||||
printf("OK\n");
|
|
||||||
|
|
||||||
printf("Lecture des labels\n");
|
|
||||||
unsigned int* labels = cudaReadMnistLabels("data/mnist/t10k-labels-idx1-ubyte");
|
|
||||||
printf("OK\n");
|
|
||||||
|
|
||||||
printf("Test des labels\n");
|
|
||||||
//! TODO: fix
|
|
||||||
// Ne provoque pas d'erreurs, mais tous les labels valent 1
|
|
||||||
check_labels<<<1, 1>>>(10000, labels);
|
|
||||||
gpuErrchk( cudaPeekAtLastError() );
|
|
||||||
gpuErrchk( cudaDeviceSynchronize() );
|
|
||||||
printf("OK\n");
|
|
||||||
|
|
||||||
return 0;
|
|
||||||
}
|
|
Loading…
Reference in New Issue
Block a user