Ce serveur Gitlab sera éteint le 30 juin 2020, pensez à migrer vos projets vers les serveurs gitlab-research.centralesupelec.fr et gitlab-student.centralesupelec.fr !

Commit e310bc8f authored by Ngocson's avatar Ngocson

modifying Wine Preprocessing

parent 739f67f4
This diff is collapsed.
This diff is collapsed.
This source diff could not be displayed because it is too large. You can view the blob instead.
This diff is collapsed.
......@@ -18,6 +18,8 @@ MNISTtestLabel = genfromtxt('DATA/MNISTtestLabel.csv', delimiter=',')
print("MNIST: loaded")
N = 16
#18 for MNIST,
# 7 for WINE
def scoreKmeansOverN(trainData,trainLabel,testData,testLabel,N):
......@@ -31,7 +33,7 @@ def scoreKmeansOverN(trainData,trainLabel,testData,testLabel,N):
pred = clf.predict(trainData)
for c,l in zip(pred,trainLabel.astype(np.int)):
cluster[c][l-3] += 1
cluster[c][l] += 1
for c in cluster:
score[0] += np.max(c)
......@@ -40,7 +42,7 @@ def scoreKmeansOverN(trainData,trainLabel,testData,testLabel,N):
cluster = np.zeros((n,len(np.unique(testLabel))))
pred = clf.predict(testData)
for c,l in zip(pred,testLabel.astype(np.int)):
cluster[c][l-3] += 1
cluster[c][l] += 1
for c in cluster:
score[1] += np.max(c)
......@@ -49,17 +51,51 @@ def scoreKmeansOverN(trainData,trainLabel,testData,testLabel,N):
print(score)
scores.append(score)
return(scores)
sc = np.array(scoreKmeansOverN(WineTrainData,
def findN():
sc = np.array(scoreKmeansOverN(WineTrainData,
WineTrainLabel,
WineTestData,
WineTestLabel,
N))
tr_score = sc[:,0]
te_score = sc[:,1]
fig = plt.figure('Kmean over WINE')
_, ax = plt.subplots()
plt.plot(range(1,N),tr_score,'b.-',label='training score')
plt.plot(range(1,N),te_score,'r.-',label='testing score')
legend = ax.legend(loc=(0.6,0.1), shadow=True)
plt.savefig("RESULTS/kmeanOverWINE.png")
\ No newline at end of file
tr_score = sc[:,0]
te_score = sc[:,1]
fig = plt.figure('Kmean over WINE')
_, ax = plt.subplots()
plt.plot(range(1,N),tr_score,'b.-',label='training score')
plt.plot(range(1,N),te_score,'r.-',label='testing score')
legend = ax.legend(loc=(0.6,0.1), shadow=True)
plt.savefig("RESULTS/kmeanOverWINE.png")
findN()
def showPrototypes(trainData,trainLabel,testData,testLabel,N):
clf = KMeans(N)
clf.fit(trainData)
clf.fit(trainData)
cluster = np.zeros((N,len(np.unique(trainLabel))))
pred = clf.predict(trainData)
centroids = clf.cluster_centers_
for c,l in zip(pred,trainLabel.astype(np.int)):
cluster[c][l-3] += 1
clusterLabel = np.zeros(N)
for c in range(len(cluster)):
clusterLabel[c] = np.argmax(cluster[c])
_, counts = np.unique(clusterLabel, return_counts=True)
n = np.max(counts)
m = len(np.unique(trainLabel))
plt.figure("Centroids")
for i in range(m):
j = 1
for c in range(len(clusterLabel)):
if clusterLabel[c] == i:
plt.subplot(m,n,n*i+j)
j+=1
print(centroids[c])
plt.imshow(centroids[c])
plt.show()
'''showPrototypes(WineTrainData,
WineTrainLabel,
WineTestData,
WineTestLabel,
N)'''
\ No newline at end of file
RESULTS/kmeanOverWINE.png

21 KB | W: | H:

RESULTS/kmeanOverWINE.png

27.4 KB | W: | H:

RESULTS/kmeanOverWINE.png
RESULTS/kmeanOverWINE.png
RESULTS/kmeanOverWINE.png
RESULTS/kmeanOverWINE.png
  • 2-up
  • Swipe
  • Onion skin
......@@ -10,7 +10,7 @@ size_train_wine = 5000
print("Step 1:\nFeature reduction:\n Load data:")
WineData = genfromtxt('DATA/WineFormated.csv', delimiter=',')
WineData = (genfromtxt('DATA/WineFormated.csv', delimiter=','))
print("WineData : Loaded")
MNISTtestData = genfromtxt('DATA/MNISTtest.txt', delimiter=',')
print("MNISTtestData : Loaded")
......@@ -18,14 +18,21 @@ MNISTtrainData = genfromtxt('DATA/MNISTtrain.txt', delimiter=',')
print("MNISTtrainData : Loaded")
print("done")
WineTest = WineData[1:size_train_wine+1]
WineTestLabel = WineTest[:,0]
WineTest = WineData[1:size_train_wine+1].astype(np.float)
np.random.shuffle(WineTest)
WineTestLabel = WineTest[:,0]-3
WineTestData = WineTest[:,1:]
WineTrain = WineData[size_train_wine+1:]
WineTrainLabel = WineTrain[:,0]
WineTrain = WineData[size_train_wine+1:].astype(np.float)
WineTrainLabel = WineTrain[:,0]-3
WineTrainData = WineTrain[:,1:]
for c in range(2,WineTestData.shape[1]):
WineTestData[:,c] = (WineTestData[:,c]- WineTestData[:,c].mean())/( WineTestData[:,c].std())
for c in range(2,WineTrainData.shape[1]):
WineTrainData[:,c] = (WineTrainData[:,c]- WineTrainData[:,c].mean())/( WineTrainData[:,c].std())
MNISTtrain = MNISTtrainData[1:size_train_mnist+1]
MNISTtrainLabel = MNISTtrain[:,0]
MNISTtrainData = MNISTtrain[:,1:]
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment