Commit 7ad33f3e authored by Ngocson's avatar Ngocson

Starting EM

parent e310bc8f
# -*- coding: utf-8 -*-
from imports import *
print("Step 1:\n Load data:")
WineTestData = genfromtxt('DATA/WineTestData.csv', delimiter=',')
WineTestLabel = genfromtxt('DATA/WineTestLabel.csv', delimiter=',')
WineTrainData = genfromtxt('DATA/WineTrainData.csv', delimiter=',')
WineTrainLabel = genfromtxt('DATA/WineTrainLabel.csv', delimiter=',')
WineDataset = (WineTrainData,WineTrainLabel,WineTestData,WineTestLabel)
print("Wine: Loaded")
MNISTtrainData = genfromtxt('DATA/MNISTtrainData.csv', delimiter=',')
MNISTtrainLabel = genfromtxt('DATA/MNISTtrainLabel.csv', delimiter=',')
MNISTtestData = genfromtxt('DATA/MNISTtestData.csv', delimiter=',')
MNISTtestLabel = genfromtxt('DATA/MNISTtestLabel.csv', delimiter=',')
MNISTdataset = (MNISTtrainData,MNISTtrainLabel,MNISTtestData,MNISTtestLabel)
print("MNIST: loaded")
N = 81
(trainData,trainLabel,testData,testLabel) = MNISTdataset
#18 for MNIST,
# 8 for WINE
def scoreEMOverN(trainData,trainLabel,testData,testLabel,N):
scores = []
for n in range(1,N):
score = [0,0]
clf = GaussianMixture(n)
clf.fit(trainData)
cluster = np.zeros((n,len(np.unique(trainLabel))))
pred = clf.predict(trainData)
for c,l in zip(pred,trainLabel.astype(np.int)):
cluster[c][l] += 1
for c in cluster:
score[0] += np.max(c)
score[0] = score[0]/np.sum(cluster)
cluster = np.zeros((n,len(np.unique(testLabel))))
pred = clf.predict(testData)
for c,l in zip(pred,testLabel.astype(np.int)):
cluster[c][l] += 1
for c in cluster:
score[1] += np.max(c)
score[1] = score[1]/np.sum(cluster)
print(score)
scores.append(score)
return(scores)
def findN(trainData,trainLabel,testData,testLabel,N):
sc = np.array(scoreEMOverN(trainData,
trainLabel,
testData,
testLabel,
N))
tr_score = sc[:,0]
te_score = sc[:,1]
fig = plt.figure('Kmean over WINE')
_, ax = plt.subplots()
plt.plot(range(1,N),tr_score,'b.-',label='training score')
plt.plot(range(1,N),te_score,'r.-',label='testing score')
legend = ax.legend(loc=(0.6,0.1), shadow=True)
plt.savefig("RESULTS/kmeanOverWINE.png")
findN(trainData,trainLabel,testData,testLabel,N)
def showPrototypes(trainData,trainLabel,testData,testLabel,N):
clf = KMeans(N)
clf.fit(trainData)
clf.fit(trainData)
cluster = np.zeros((N,len(np.unique(trainLabel))))
pred = clf.predict(trainData)
centroids = clf.cluster_centers_
for c,l in zip(pred,trainLabel.astype(np.int)):
cluster[c][l-3] += 1
clusterLabel = np.zeros(N)
for c in range(len(cluster)):
clusterLabel[c] = np.argmax(cluster[c])
_, counts = np.unique(clusterLabel, return_counts=True)
n = np.max(counts)
m = len(np.unique(trainLabel))
plt.figure("Centroids")
for i in range(m):
j = 1
for c in range(len(clusterLabel)):
if clusterLabel[c] == i:
plt.subplot(m,n,n*i+j)
j+=1
print(centroids[c])
plt.imshow(centroids[c])
plt.show()
'''showPrototypes(WineTrainData,
WineTrainLabel,
WineTestData,
WineTestLabel,
N)'''
\ No newline at end of file
......@@ -19,7 +19,7 @@ print("MNIST: loaded")
N = 16
#18 for MNIST,
# 7 for WINE
# 8 for WINE
def scoreKmeansOverN(trainData,trainLabel,testData,testLabel,N):
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment