Ce serveur Gitlab sera éteint le 30 juin 2020, pensez à migrer vos projets vers les serveurs gitlab-research.centralesupelec.fr et gitlab-student.centralesupelec.fr !

KMEANS.py 3.48 KB
Newer Older
Ngocson's avatar
Ngocson committed
1 2 3 4 5 6 7 8 9 10
# -*- coding: utf-8 -*-
from imports import *

print("Step 1:\n Load data:")

WineTestData = genfromtxt('DATA/WineTestData.csv', delimiter=',')
WineTestLabel = genfromtxt('DATA/WineTestLabel.csv', delimiter=',')

WineTrainData = genfromtxt('DATA/WineTrainData.csv', delimiter=',')
WineTrainLabel = genfromtxt('DATA/WineTrainLabel.csv', delimiter=',')
Ngocson's avatar
c  
Ngocson committed
11 12

WineDataset = (8,WineTrainData,WineTrainLabel,WineTestData,WineTestLabel,'Wine')
Ngocson's avatar
Ngocson committed
13 14 15 16 17 18 19
print("Wine: Loaded")

MNISTtrainData = genfromtxt('DATA/MNISTtrainData.csv', delimiter=',')
MNISTtrainLabel = genfromtxt('DATA/MNISTtrainLabel.csv', delimiter=',')

MNISTtestData = genfromtxt('DATA/MNISTtestData.csv', delimiter=',')
MNISTtestLabel = genfromtxt('DATA/MNISTtestLabel.csv', delimiter=',')
Ngocson's avatar
c  
Ngocson committed
20
MNISTdataset = (18,MNISTtrainData,MNISTtrainLabel,MNISTtestData,MNISTtestLabel,'MNIST')
Ngocson's avatar
Ngocson committed
21 22
print("MNIST: loaded")

Ngocson's avatar
c  
Ngocson committed
23 24 25 26


(N,trainData,trainLabel,testData,testLabel,name) = WineDataset
Nsearch = 16
Ngocson's avatar
Ngocson committed
27
#18 for MNIST,
Ngocson's avatar
Ngocson committed
28
# 8 for WINE
Ngocson's avatar
Ngocson committed
29

Ngocson's avatar
c  
Ngocson committed
30
def scoreEMOverN(trainData,trainLabel,testData,testLabel,N):
Ngocson's avatar
Ngocson committed
31 32 33 34 35 36 37 38 39 40 41
                     
    scores = []
    for n in range(1,N):
        score = [0,0]        
        clf = KMeans(n)
        clf.fit(trainData)
        
        cluster = np.zeros((n,len(np.unique(trainLabel))))
        
        pred = clf.predict(trainData)
        for c,l in zip(pred,trainLabel.astype(np.int)):    
Ngocson's avatar
Ngocson committed
42
            cluster[c][l] += 1
Ngocson's avatar
Ngocson committed
43 44 45 46 47 48 49 50
        
        for c in cluster:
            score[0] += np.max(c)
        score[0] = score[0]/np.sum(cluster)
        
        cluster = np.zeros((n,len(np.unique(testLabel))))
        pred = clf.predict(testData)
        for c,l in zip(pred,testLabel.astype(np.int)):    
Ngocson's avatar
Ngocson committed
51
            cluster[c][l] += 1
Ngocson's avatar
Ngocson committed
52 53 54 55 56 57 58 59
        
        for c in cluster:
            score[1] += np.max(c)
        score[1] = score[1]/np.sum(cluster)
        
        print(score)
        scores.append(score)
    return(scores)
Ngocson's avatar
c  
Ngocson committed
60 61 62 63 64 65 66
    
    
def findN(trainData,trainLabel,testData,testLabel,N):
    sc = np.array(scoreEMOverN(trainData,
                      trainLabel,
                      testData,
                      testLabel,
Ngocson's avatar
Ngocson committed
67
                      N))
Ngocson's avatar
Ngocson committed
68 69 70 71 72 73 74
    tr_score = sc[:,0]
    te_score = sc[:,1]
    fig = plt.figure('Kmean over WINE')
    _, ax = plt.subplots()
    plt.plot(range(1,N),tr_score,'b.-',label='training score')
    plt.plot(range(1,N),te_score,'r.-',label='testing score')
    legend = ax.legend(loc=(0.6,0.1), shadow=True)
Ngocson's avatar
c  
Ngocson committed
75
    plt.savefig("RESULTS/KMEANOver"+name+".png")
Ngocson's avatar
Ngocson committed
76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93

def showPrototypes(trainData,trainLabel,testData,testLabel,N):
    clf = KMeans(N)
    clf.fit(trainData)
    clf.fit(trainData)
    
    cluster = np.zeros((N,len(np.unique(trainLabel))))
    pred = clf.predict(trainData)
    centroids = clf.cluster_centers_
    for c,l in zip(pred,trainLabel.astype(np.int)):    
        cluster[c][l-3] += 1
    clusterLabel = np.zeros(N)
    for c in range(len(cluster)):
        clusterLabel[c] = np.argmax(cluster[c])
    _, counts = np.unique(clusterLabel, return_counts=True)
    n = np.max(counts)
    m = len(np.unique(trainLabel))
    
Ngocson's avatar
c  
Ngocson committed
94
    fig = plt.figure("Centroids")
Ngocson's avatar
Ngocson committed
95 96 97 98 99 100
    for i in range(m):
        j = 1
        for c in range(len(clusterLabel)):
            if clusterLabel[c] == i:
                plt.subplot(m,n,n*i+j)
                j+=1
Ngocson's avatar
c  
Ngocson committed
101 102 103 104 105 106 107 108
                if name == 'Wine':
                    plt.imshow([centroids[c] for i in range(10)])
                else:
                    plt.imshow(centroids[c])
    plt.savefig("RESULTS/Kmeancentroids"+name+".png")

findN(trainData,trainLabel,testData,testLabel,Nsearch)
showPrototypes(trainData,trainLabel,testData,testLabel,N)