Commit 7311f914 authored by Ngocson's avatar Ngocson

c

parent 7ad33f3e
...@@ -9,7 +9,7 @@ WineTestLabel = genfromtxt('DATA/WineTestLabel.csv', delimiter=',') ...@@ -9,7 +9,7 @@ WineTestLabel = genfromtxt('DATA/WineTestLabel.csv', delimiter=',')
WineTrainData = genfromtxt('DATA/WineTrainData.csv', delimiter=',') WineTrainData = genfromtxt('DATA/WineTrainData.csv', delimiter=',')
WineTrainLabel = genfromtxt('DATA/WineTrainLabel.csv', delimiter=',') WineTrainLabel = genfromtxt('DATA/WineTrainLabel.csv', delimiter=',')
WineDataset = (WineTrainData,WineTrainLabel,WineTestData,WineTestLabel) WineDataset = (5,WineTrainData,WineTrainLabel,WineTestData,WineTestLabel,'Wine')
print("Wine: Loaded") print("Wine: Loaded")
MNISTtrainData = genfromtxt('DATA/MNISTtrainData.csv', delimiter=',') MNISTtrainData = genfromtxt('DATA/MNISTtrainData.csv', delimiter=',')
...@@ -17,12 +17,13 @@ MNISTtrainLabel = genfromtxt('DATA/MNISTtrainLabel.csv', delimiter=',') ...@@ -17,12 +17,13 @@ MNISTtrainLabel = genfromtxt('DATA/MNISTtrainLabel.csv', delimiter=',')
MNISTtestData = genfromtxt('DATA/MNISTtestData.csv', delimiter=',') MNISTtestData = genfromtxt('DATA/MNISTtestData.csv', delimiter=',')
MNISTtestLabel = genfromtxt('DATA/MNISTtestLabel.csv', delimiter=',') MNISTtestLabel = genfromtxt('DATA/MNISTtestLabel.csv', delimiter=',')
MNISTdataset = (MNISTtrainData,MNISTtrainLabel,MNISTtestData,MNISTtestLabel) MNISTdataset = (18,MNISTtrainData,MNISTtrainLabel,MNISTtestData,MNISTtestLabel,'MNIST')
print("MNIST: loaded") print("MNIST: loaded")
N = 81
(trainData,trainLabel,testData,testLabel) = MNISTdataset
(N,trainData,trainLabel,testData,testLabel,name) = WineDataset
Nsearch = 16
#18 for MNIST, #18 for MNIST,
# 8 for WINE # 8 for WINE
...@@ -56,6 +57,8 @@ def scoreEMOverN(trainData,trainLabel,testData,testLabel,N): ...@@ -56,6 +57,8 @@ def scoreEMOverN(trainData,trainLabel,testData,testLabel,N):
print(score) print(score)
scores.append(score) scores.append(score)
return(scores) return(scores)
def findN(trainData,trainLabel,testData,testLabel,N): def findN(trainData,trainLabel,testData,testLabel,N):
sc = np.array(scoreEMOverN(trainData, sc = np.array(scoreEMOverN(trainData,
trainLabel, trainLabel,
...@@ -69,17 +72,16 @@ def findN(trainData,trainLabel,testData,testLabel,N): ...@@ -69,17 +72,16 @@ def findN(trainData,trainLabel,testData,testLabel,N):
plt.plot(range(1,N),tr_score,'b.-',label='training score') plt.plot(range(1,N),tr_score,'b.-',label='training score')
plt.plot(range(1,N),te_score,'r.-',label='testing score') plt.plot(range(1,N),te_score,'r.-',label='testing score')
legend = ax.legend(loc=(0.6,0.1), shadow=True) legend = ax.legend(loc=(0.6,0.1), shadow=True)
plt.savefig("RESULTS/kmeanOverWINE.png") plt.savefig("RESULTS/EMOver"+name+".png")
findN(trainData,trainLabel,testData,testLabel,N)
def showPrototypes(trainData,trainLabel,testData,testLabel,N): def showPrototypes(trainData,trainLabel,testData,testLabel,N):
clf = KMeans(N) clf = GaussianMixture(N)
clf.fit(trainData) clf.fit(trainData)
clf.fit(trainData) clf.fit(trainData)
cluster = np.zeros((N,len(np.unique(trainLabel)))) cluster = np.zeros((N,len(np.unique(trainLabel))))
pred = clf.predict(trainData) pred = clf.predict(trainData)
centroids = clf.cluster_centers_ centroids = clf.means_
for c,l in zip(pred,trainLabel.astype(np.int)): for c,l in zip(pred,trainLabel.astype(np.int)):
cluster[c][l-3] += 1 cluster[c][l-3] += 1
clusterLabel = np.zeros(N) clusterLabel = np.zeros(N)
...@@ -89,18 +91,19 @@ def showPrototypes(trainData,trainLabel,testData,testLabel,N): ...@@ -89,18 +91,19 @@ def showPrototypes(trainData,trainLabel,testData,testLabel,N):
n = np.max(counts) n = np.max(counts)
m = len(np.unique(trainLabel)) m = len(np.unique(trainLabel))
plt.figure("Centroids") print(clusterLabel)
fig = plt.figure("Centroids")
for i in range(m): for i in range(m):
j = 1 j = 1
for c in range(len(clusterLabel)): for c in range(len(clusterLabel)):
if clusterLabel[c] == i: if clusterLabel[c] == i:
plt.subplot(m,n,n*i+j) plt.subplot(m,n,n*i+j)
j+=1 j+=1
print(centroids[c]) if name == 'Wine':
plt.imshow(centroids[c]) plt.imshow([centroids[c] for i in range(10)])
plt.show() else:
'''showPrototypes(WineTrainData, plt.imshow(centroids[c])
WineTrainLabel, plt.savefig("RESULTS/EMcentroids"+name+".png")
WineTestData,
WineTestLabel, findN(trainData,trainLabel,testData,testLabel,Nsearch)
N)''' showPrototypes(trainData,trainLabel,testData,testLabel,N)
\ No newline at end of file \ No newline at end of file
...@@ -8,6 +8,8 @@ WineTestLabel = genfromtxt('DATA/WineTestLabel.csv', delimiter=',') ...@@ -8,6 +8,8 @@ WineTestLabel = genfromtxt('DATA/WineTestLabel.csv', delimiter=',')
WineTrainData = genfromtxt('DATA/WineTrainData.csv', delimiter=',') WineTrainData = genfromtxt('DATA/WineTrainData.csv', delimiter=',')
WineTrainLabel = genfromtxt('DATA/WineTrainLabel.csv', delimiter=',') WineTrainLabel = genfromtxt('DATA/WineTrainLabel.csv', delimiter=',')
WineDataset = (8,WineTrainData,WineTrainLabel,WineTestData,WineTestLabel,'Wine')
print("Wine: Loaded") print("Wine: Loaded")
MNISTtrainData = genfromtxt('DATA/MNISTtrainData.csv', delimiter=',') MNISTtrainData = genfromtxt('DATA/MNISTtrainData.csv', delimiter=',')
...@@ -15,13 +17,17 @@ MNISTtrainLabel = genfromtxt('DATA/MNISTtrainLabel.csv', delimiter=',') ...@@ -15,13 +17,17 @@ MNISTtrainLabel = genfromtxt('DATA/MNISTtrainLabel.csv', delimiter=',')
MNISTtestData = genfromtxt('DATA/MNISTtestData.csv', delimiter=',') MNISTtestData = genfromtxt('DATA/MNISTtestData.csv', delimiter=',')
MNISTtestLabel = genfromtxt('DATA/MNISTtestLabel.csv', delimiter=',') MNISTtestLabel = genfromtxt('DATA/MNISTtestLabel.csv', delimiter=',')
MNISTdataset = (18,MNISTtrainData,MNISTtrainLabel,MNISTtestData,MNISTtestLabel,'MNIST')
print("MNIST: loaded") print("MNIST: loaded")
N = 16
(N,trainData,trainLabel,testData,testLabel,name) = WineDataset
Nsearch = 16
#18 for MNIST, #18 for MNIST,
# 8 for WINE # 8 for WINE
def scoreKmeansOverN(trainData,trainLabel,testData,testLabel,N): def scoreEMOverN(trainData,trainLabel,testData,testLabel,N):
scores = [] scores = []
for n in range(1,N): for n in range(1,N):
...@@ -51,11 +57,13 @@ def scoreKmeansOverN(trainData,trainLabel,testData,testLabel,N): ...@@ -51,11 +57,13 @@ def scoreKmeansOverN(trainData,trainLabel,testData,testLabel,N):
print(score) print(score)
scores.append(score) scores.append(score)
return(scores) return(scores)
def findN():
sc = np.array(scoreKmeansOverN(WineTrainData,
WineTrainLabel, def findN(trainData,trainLabel,testData,testLabel,N):
WineTestData, sc = np.array(scoreEMOverN(trainData,
WineTestLabel, trainLabel,
testData,
testLabel,
N)) N))
tr_score = sc[:,0] tr_score = sc[:,0]
te_score = sc[:,1] te_score = sc[:,1]
...@@ -64,8 +72,7 @@ def findN(): ...@@ -64,8 +72,7 @@ def findN():
plt.plot(range(1,N),tr_score,'b.-',label='training score') plt.plot(range(1,N),tr_score,'b.-',label='training score')
plt.plot(range(1,N),te_score,'r.-',label='testing score') plt.plot(range(1,N),te_score,'r.-',label='testing score')
legend = ax.legend(loc=(0.6,0.1), shadow=True) legend = ax.legend(loc=(0.6,0.1), shadow=True)
plt.savefig("RESULTS/kmeanOverWINE.png") plt.savefig("RESULTS/KMEANOver"+name+".png")
findN()
def showPrototypes(trainData,trainLabel,testData,testLabel,N): def showPrototypes(trainData,trainLabel,testData,testLabel,N):
clf = KMeans(N) clf = KMeans(N)
...@@ -84,18 +91,18 @@ def showPrototypes(trainData,trainLabel,testData,testLabel,N): ...@@ -84,18 +91,18 @@ def showPrototypes(trainData,trainLabel,testData,testLabel,N):
n = np.max(counts) n = np.max(counts)
m = len(np.unique(trainLabel)) m = len(np.unique(trainLabel))
plt.figure("Centroids") fig = plt.figure("Centroids")
for i in range(m): for i in range(m):
j = 1 j = 1
for c in range(len(clusterLabel)): for c in range(len(clusterLabel)):
if clusterLabel[c] == i: if clusterLabel[c] == i:
plt.subplot(m,n,n*i+j) plt.subplot(m,n,n*i+j)
j+=1 j+=1
print(centroids[c]) if name == 'Wine':
plt.imshow(centroids[c]) plt.imshow([centroids[c] for i in range(10)])
plt.show() else:
'''showPrototypes(WineTrainData, plt.imshow(centroids[c])
WineTrainLabel, plt.savefig("RESULTS/Kmeancentroids"+name+".png")
WineTestData,
WineTestLabel, findN(trainData,trainLabel,testData,testLabel,Nsearch)
N)''' showPrototypes(trainData,trainLabel,testData,testLabel,N)
\ No newline at end of file \ No newline at end of file
# -*- coding: utf-8 -*-
import tensorflow as tf
from tensorflow import keras
import matplotlib.pyplot as plt
import numpy as np
MNISTtrainData = genfromtxt('DATA/MNISTtrainData.csv', delimiter=',')
MNISTtrainLabel = genfromtxt('DATA/MNISTtrainLabel.csv', delimiter=',')
MNISTtestData = genfromtxt('DATA/MNISTtestData.csv', delimiter=',')
MNISTtestLabel = genfromtxt('DATA/MNISTtestLabel.csv', delimiter=',')
MNISTdataset = (MNISTtrainData,MNISTtrainLabel,MNISTtestData,MNISTtestLabel,'MNIST')
MNISTdatasets = (MNISTdataset,PCAMNISTdataset,ICAMNISTdataset,RPMNISTdataset,LDAMNISTdataset)
WineTestData = genfromtxt('DATA/WineTestData.csv', delimiter=',')
WineTestLabel = genfromtxt('DATA/WineTestLabel.csv', delimiter=',')
WineTrainData = genfromtxt('DATA/WineTrainData.csv', delimiter=',')
WineTrainLabel = genfromtxt('DATA/WineTrainLabel.csv', delimiter=',')
WineDataset = (8,WineTrainData,WineTrainLabel,WineTestData,WineTestLabel,'Wine')
WineDatasets = (WineDataset,PCAWineDataset,ICAWineDataset,RPWineDataset,LDAWineDataset)
N = 50
te_scores = []
tr_scores = []
for dataset in MNISTdatasets:
te_score = []
tr_score = []
for n in range(N):
trainData,trainLabel,testData,testLabel,name = dataset
#MNIST
model = keras.Sequential([
keras.layers.Flatten(input_shape=784),
keras.layers.Dense(128, activation=tf.nn.tanh),
keras.layers.Dense(64, activation=tf.nn.tanh),
keras.layers.Dense(32, activation=tf.nn.tanh),
keras.layers.Dense(10, activation=tf.nn.softmax)
])
model.compile(optimizer='adam',
loss='sparse_categorical_crossentropy',
metrics=['accuracy'])
model.fit(trainData, trainLabel, epochs=n)
test_loss, test_acc = model.evaluate(testData, testLabel)
te_score.append(test_acc)
train_loss, train_acc = model.evaluate(testData, trainLabel)
tr_score.append(train_acc)
plt.figure()
plt.plot(te_score,'b.-',label = "testing score")
plt.plot(tr_score,'r.-',label = "training score")
plt.figsave("RESULTS/"+name+".png")
te_scores.append(te_score)
tr_scores.append(tr_score)
for dataset in WineDatasets:
te_score = []
tr_score = []
for n in range(N):
trainData,trainLabel,testData,testLabel,name = dataset
#MNIST
model = keras.Sequential([
keras.layers.Flatten(input_shape=13),
keras.layers.Dense(64, activation=tf.nn.tanh),
keras.layers.Dense(64, activation=tf.nn.tanh),
keras.layers.Dense(32, activation=tf.nn.tanh),
keras.layers.Dense(7, activation=tf.nn.softmax)
])
model.compile(optimizer='adam',
loss='sparse_categorical_crossentropy',
metrics=['accuracy'])
model.fit(trainData, trainLabel, epochs=n)
test_loss, test_acc = model.evaluate(testData, testLabel)
te_score.append(test_acc)
train_loss, train_acc = model.evaluate(testData, trainLabel)
tr_score.append(train_acc)
plt.figure()
plt.plot(te_score,'b.-',label = "testing score")
plt.plot(tr_score,'r.-',label = "training score")
plt.figsave("RESULTS/"+name+".png")
te_scores.append(te_score)
tr_scores.append(tr_score)
\ No newline at end of file
# -*- coding: utf-8 -*-
# -*- coding: utf-8 -*-
#from imports import *
import numpy as np
from numpy import genfromtxt
import csv
import matplotlib.pyplot as plt
from sklearn.decomposition import PCA
from sklearn.decomposition import FastICA
from sklearn.discriminant_analysis import LinearDiscriminantAnalysis
from sklearn import random_projection
from sklearn.cluster import KMeans
from sklearn.mixture import GaussianMixture
Num = '_done'
Ncluster = 10
Ncomponent = 2
pca = PCA(Ncomponent)
ica = FastICA(n_components = Ncomponent,
tol = 1e-3,
max_iter = 1000)
rp = random_projection.GaussianRandomProjection(n_components = Ncomponent,
eps = 0.5)
lda = LinearDiscriminantAnalysis(n_components = Ncomponent)
algos = [(pca,"PCA"),(ica,"ICA"),(rp,"RandomProjection"),(lda,"LDA")]
km = KMeans(n_clusters = Ncluster,
init = 'k-means++',
n_jobs = -1
)
em = GaussianMixture(n_components = Ncluster,
verbose = 2)
clusteringAglos = [(km,"KMeans"),(em,"ExpectationMaximization")]
print("Step 1:\n Load data:")
WineTestData = genfromtxt('DATA/WineTestData.csv', delimiter=',')
WineTestLabel = genfromtxt('DATA/WineTestLabel.csv', delimiter=',')
WineTrainData = genfromtxt('DATA/WineTrainData.csv', delimiter=',')
WineTrainLabel = genfromtxt('DATA/WineTrainLabel.csv', delimiter=',')
print("Wine: Loaded")
MNISTtrainData = genfromtxt('DATA/MNISTtrainData.csv', delimiter=',')
MNISTtrainLabel = genfromtxt('DATA/MNISTtrainLabel.csv', delimiter=',')
MNISTtestData = genfromtxt('DATA/MNISTtestData.csv', delimiter=',')
MNISTtestLabel = genfromtxt('DATA/MNISTtestLabel.csv', delimiter=',')
print("MNIST: loaded")
datasets = [("MNIST",MNISTtestData,MNISTtrainData,MNISTtestLabel,MNISTtrainLabel,18),
("Wine",WineTestData,WineTrainData,WineTestLabel,WineTrainLabel,7)]
transformedDatasets = []
print("Step 2: \nFeature reduction:")
for algo in algos:
for dataset in datasets:
print("Fitting the reduction of dimensionality "+algo[1]+" on the dataset "+dataset[0])
if algo[1] == "LDA":
algo[0].fit(dataset[2],dataset[4])
else:
algo[0].fit(dataset[2])
print("done\nTransforming the data")
transformedDatasetTst = algo[0].transform(dataset[2])
print(" writing the data")
with open(algo[1]+'_'+dataset[0]+'_tst'+Num+'.csv', 'w') as csvfile:
writer = csv.writer(csvfile, delimiter=',', quotechar='"', quoting=csv.QUOTE_MINIMAL)
for data in transformedDatasetTst:
writer.writerow(data)
transformedDatasetTr = algo[0].transform(dataset[2])
print(" writing the data")
with open(algo[1]+'_'+dataset[0]+'_tr'+Num+'.csv', 'w') as csvfile:
writer = csv.writer(csvfile, delimiter=',', quotechar='"', quoting=csv.QUOTE_MINIMAL)
for data in transformedDatasetTr:
writer.writerow(data)
transformedDatasets.append((dataset[0],transformedDatasetTst,transformedDatasetTr,dataset[3],dataset[4]))
print("done")
RESULTS/kmeanOverWINE.png

27.4 KB | W: | H:

RESULTS/kmeanOverWINE.png

28 KB | W: | H:

RESULTS/kmeanOverWINE.png
RESULTS/kmeanOverWINE.png
RESULTS/kmeanOverWINE.png
RESULTS/kmeanOverWINE.png
  • 2-up
  • Swipe
  • Onion skin
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment