Ce serveur Gitlab sera éteint le 30 juin 2020, pensez à migrer vos projets vers les serveurs gitlab-research.centralesupelec.fr et gitlab-student.centralesupelec.fr !

Commit 9eec0a39 authored by Hachemin Pierre-Yves's avatar Hachemin Pierre-Yves

Cluster and data vis

parent 4146b653
This diff is collapsed.
import ast import ast
import os import os
import cv2
import requests import requests
import matplotlib.pyplot as plt import matplotlib.pyplot as plt
#L = ['Sithad108Og.','fBNpSRtfIUA', '2e-eXJ6HgkQ', 'tN1A2mVnrOM', 'myTaigPrbsg', 'b75lZw8nkvo']
genres = [28, 35, 18, 99, 10749, 10752, 10402, 53, 878, 27, 9648, 80, 14, 12, 36, 10769, 16, 10751, 37, 10770] genres = [28, 35, 18, 99, 10749, 10752, 10402, 53, 878, 27, 9648, 80, 14, 12, 36, 10769, 16, 10751, 37, 10770]
list_of_eligible_spectrums = [] years = range(2000, 2018)
for file in os.listdir("SpectrumImages2005"):
if str(file)[-4:] == '.jpg':
list_of_eligible_spectrums += [file]
print(len(list_of_eligible_spectrums))
# def get_genre_from_link():
# path = "./Link-dictionaries/Link-dictionary2005.txt"
# file = open(path, "r").read()
# dictyear = ast.literal_eval(file)
# dict_inverse = {}
# links_to_be_removed = []
# for movie_id in dictyear.keys():
# if dictyear[movie_id][1] != []:
# dict_inverse[str(dictyear[movie_id][2])] = {}
# for genre in genres:
# if genre in dictyear[movie_id][1]:
# dict_inverse[str(dictyear[movie_id][2])][genre] = 1
# else:
# dict_inverse[str(dictyear[movie_id][2])][genre] = 0
# else:
# #print(f'careful, link {dictyear[movie_id][2]} needs to be removed from the list')
# links_to_be_removed += [dictyear[movie_id][2]]
# return dict_inverse, links_to_be_removed
#
#
# def get_output_list(L):
# dict_inverse, links_to_be_removed = get_genre_from_link()
# eligible_links = []
# output = []
# for link in L:
# link = str(link)
# #print(dict_inverse[str(link)])
# if link[-1] == ".":
# print("do something! Too many points.......")
# if link[:-4] not in links_to_be_removed:
# output += [dict_inverse[link[:-4]]]
# eligible_links += [link]
# return output, eligible_links
# labels, eligible_links = get_output_list(list_of_eligible_spectrums)
def get_genre_from_link():
path = "./Link-dictionaries/Link-dictionary2005.txt"
file = open(path, "r").read()
dictyear = ast.literal_eval(file)
dict_inverse = {}
links_to_be_removed = []
for movie_id in dictyear.keys():
if dictyear[movie_id][1] != []:
dict_inverse[str(dictyear[movie_id][2])] = {}
for genre in genres:
if genre in dictyear[movie_id][1]:
dict_inverse[str(dictyear[movie_id][2])][genre] = 1
else:
dict_inverse[str(dictyear[movie_id][2])][genre] = 0
else:
#print(f'careful, link {dictyear[movie_id][2]} needs to be removed from the list')
links_to_be_removed += [dictyear[movie_id][2]]
return dict_inverse, links_to_be_removed
def get_output_list(L): list_of_eligible_spectrums = []
dict_inverse, links_to_be_removed = get_genre_from_link() for year in years:
eligible_links = [] for file in os.listdir("./spectrumImages/SpectrumImages" + str(year)):
output = [] if str(file)[-4:] == '.jpg':
for link in L: list_of_eligible_spectrums += ['SpectrumImages'+ str(year) +'/' + file]
link = str(link)
#print(dict_inverse[str(link)])
if link[-1] == ".":
print("do something! Too many points.......")
if link[:-4] not in links_to_be_removed:
eligible_links += [link[:-4]]
return dict_inverse, eligible_links
dict_inverse, eligible_links = get_output_list(list_of_eligible_spectrums)
for file in eligible_links:
img = cv2.imread('SpectrumImages2005/' + file + '.jpg', 1)
img = img[0:1]
img = img.reshape((img.shape[1], img.shape[2]))
dict_inverse[file]['image'] += [img]
print('coucocu')
url = "https://api.themoviedb.org/3/genre/movie/list?" url = "https://api.themoviedb.org/3/genre/movie/list?"
API = 'api_key=a9075982d1f7ce05cc45adec0e5f5358&language=en-US' API = 'api_key=a9075982d1f7ce05cc45adec0e5f5358&language=en-US'
...@@ -128,21 +48,12 @@ def sum_genre(list_years): ...@@ -128,21 +48,12 @@ def sum_genre(list_years):
final_dict_genre[key] = dict_genre[key] final_dict_genre[key] = dict_genre[key]
return final_dict_genre return final_dict_genre
sum_genre = sum_genre(range(2000, 2018))
# print(get_genres(2001).keys())
# #print(get_genres(2002).keys())
# print(get_genres(2003).keys())
# print(get_genres(2004).keys())
# print(get_genres(2005).keys())
#print(translate_genre())
sum_genre = sum_genre(range(2003, 2014))
L = [] L = []
for key in sum_genre.keys(): for key in sum_genre.keys():
L += [str(key)] L += [str(key)]
print(L, list(sum_genre.values())) print(L, list(sum_genre.values()))
print(sum(list(sum_genre.values()))) print(sum(list(sum_genre.values())))
plt.bar(L, list(sum_genre.values())) plt.bar(L, list(sum_genre.values()))
plt.show()
plt.show() \ No newline at end of file
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment