Ce serveur Gitlab sera éteint le 30 juin 2020, pensez à migrer vos projets vers les serveurs gitlab-research.centralesupelec.fr et gitlab-student.centralesupelec.fr !

MnistExperiences.py 3.93 KB
Newer Older
Ngocson's avatar
Ngocson committed
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112
# -*- coding: utf-8 -*-

from imports import *
Num = '1'
Ncluster = 10

pca = PCA(18)
nmf = NMF()
ica = FastICA(tol = 1e-3,
              max_iter = 1000)
rp = random_projection.GaussianRandomProjection(n_components = 500,
                                                eps = 0.5)

algos = [(pca,"PCA"),(ica,"ICA"),(rp,"RandomProjection"),(nmf,"NMF")]

km = KMeans(n_clusters = Ncluster,
    init = 'k-means++',
    n_jobs = -1
    )
    
em = GaussianMixture(n_components = Ncluster,
                     verbose = 2)

clusteringAglos = [(km,"KMeans"),(em,"ExpectationMaximization")]
#TODO:
# Random projection
# choosing the fourth feature reduction algorithm

print("Step 1:\n Load data:")

WineTestData = genfromtxt('DATA/WineTestData.csv', delimiter=',')
WineTestLabel = genfromtxt('DATA/WineTestLabel.csv', delimiter=',')

WineTrainData = genfromtxt('DATA/WineTrainData.csv', delimiter=',')
WineTrainLabel = genfromtxt('DATA/WineTrainLabel.csv', delimiter=',')
print("Wine: Loaded")



MNISTtrainData = genfromtxt('DATA/MNISTtrainData.csv', delimiter=',')
MNISTtrainLabel = genfromtxt('DATA/MNISTtrainLabel.csv', delimiter=',')

MNISTtestData = genfromtxt('DATA/MNISTtestData.csv', delimiter=',')
MNISTtestLabel = genfromtxt('DATA/MNISTtestLabel.csv', delimiter=',')
print("MNIST: loaded")

datasets = [("Wine",WineTestData,WineTrainData,WineTestLabel,WineTrainLabel),
            ("MNIST",MNISTtestData,MNISTtrainData,MNISTtestLabel,MNISTtrainLabel)
            ]
#Todo once:
#Write the data 

transformedDatasets = []

print("Step 2: \nFeature reduction:")
for algo in algos:
    for dataset in datasets:
        print("Fitting the reduction of dimensionality "+algo[1]+" on the dataset "+dataset[0])
        algo[0].fit(dataset[2])
        print("done\nTransforming the data")
        transformedDatasetTst = algo[0].transform(dataset[2])
     
        print(" writing the data")        
        with open(algo[1]+dataset[0]+'tst'+Num+'.csv', 'w') as csvfile:
            writer = csv.writer(csvfile, delimiter=',', quotechar='"', quoting=csv.QUOTE_MINIMAL)
            for data in transformedDatasetTst:
                writer.writerow(data)
        transformedDatasetTr = algo[0].transform(dataset[2])
    
        print(" writing the data")        
        with open(algo[1]+dataset[0]+'tr'+Num+'.csv', 'w') as csvfile:
            writer = csv.writer(csvfile, delimiter=',', quotechar='"', quoting=csv.QUOTE_MINIMAL)
            for data in transformedDatasetTr:
                writer.writerow(data)
        
        transformedDatasets.append((dataset[0],transformedDatasetTst,transformedDatasetTr,dataset[3],dataset[4]))   
        print("done")

print("Step 3: \nApplying the clustering algorithm on the non-reduced data:")

for algo in clusteringAglos:
    for dataset in datasets:
        print("applying the aglo "+algo[1]+" on the dataset "+dataset[0])
        algo[0].fit(dataset[2])
        print("training done")
        
        Prediction = algo[0].predict(dataset[1])
        print(" writing the data") 
        
        with open(algo[1]+dataset[0]+'prediction'+Num+'.csv', 'w') as csvfile:
            writer = csv.writer(csvfile, delimiter=',', quotechar='"', quoting=csv.QUOTE_MINIMAL)
            writer.writerow(Prediction)
       
print("Step 4: \nApplying the clustering algorithm on the reduced data:")

for algo in clusteringAglos:
    for dataset in transformedDatasets:
        print("applying the aglo "+algo[1]+" on the dataset "+dataset[0])
        algo[0].fit(dataset[2])
        print("training done")
        
        Prediction = algo[0].predict(dataset[1])
        print(" writing the data") 
        
        with open(algo[1]+dataset[0]+'prediction'+Num+'.csv', 'w') as csvfile:
            writer = csv.writer(csvfile, delimiter=',', quotechar='"', quoting=csv.QUOTE_MINIMAL)
            if type(Prediction[0])==np.int32:
                writer.writerow(Prediction)
            else:
                for data in Prediction:
                    writer.writerow([data])