Commit 3c77d010 authored by Hachemin Pierre-Yves's avatar Hachemin Pierre-Yves

Replace Data_Vis_Cleaning.py

parent ea7499e9
import ast
import os
import cv2
import requests
import matplotlib.pyplot as plt
#L = ['Sithad108Og.','fBNpSRtfIUA', '2e-eXJ6HgkQ', 'tN1A2mVnrOM', 'myTaigPrbsg', 'b75lZw8nkvo']
genres = [28, 35, 18, 99, 10749, 10752, 10402, 53, 878, 27, 9648, 80, 14, 12, 36, 10769, 16, 10751, 37, 10770]
list_of_eligible_spectrums = []
for file in os.listdir("SpectrumImages2005"):
if str(file)[-4:] == '.jpg':
list_of_eligible_spectrums += [file]
print(len(list_of_eligible_spectrums))
# def get_genre_from_link():
# path = "./Link-dictionaries/Link-dictionary2005.txt"
# file = open(path, "r").read()
# dictyear = ast.literal_eval(file)
# dict_inverse = {}
# links_to_be_removed = []
# for movie_id in dictyear.keys():
# if dictyear[movie_id][1] != []:
# dict_inverse[str(dictyear[movie_id][2])] = {}
# for genre in genres:
# if genre in dictyear[movie_id][1]:
# dict_inverse[str(dictyear[movie_id][2])][genre] = 1
# else:
# dict_inverse[str(dictyear[movie_id][2])][genre] = 0
# else:
# #print(f'careful, link {dictyear[movie_id][2]} needs to be removed from the list')
# links_to_be_removed += [dictyear[movie_id][2]]
# return dict_inverse, links_to_be_removed
#
#
# def get_output_list(L):
# dict_inverse, links_to_be_removed = get_genre_from_link()
# eligible_links = []
# output = []
# for link in L:
# link = str(link)
# #print(dict_inverse[str(link)])
# if link[-1] == ".":
# print("do something! Too many points.......")
# if link[:-4] not in links_to_be_removed:
# output += [dict_inverse[link[:-4]]]
# eligible_links += [link]
# return output, eligible_links
# labels, eligible_links = get_output_list(list_of_eligible_spectrums)
def get_genre_from_link():
path = "./Link-dictionaries/Link-dictionary2005.txt"
file = open(path, "r").read()
dictyear = ast.literal_eval(file)
dict_inverse = {}
links_to_be_removed = []
for movie_id in dictyear.keys():
if dictyear[movie_id][1] != []:
dict_inverse[str(dictyear[movie_id][2])] = {}
for genre in genres:
if genre in dictyear[movie_id][1]:
dict_inverse[str(dictyear[movie_id][2])][genre] = 1
else:
dict_inverse[str(dictyear[movie_id][2])][genre] = 0
else:
#print(f'careful, link {dictyear[movie_id][2]} needs to be removed from the list')
links_to_be_removed += [dictyear[movie_id][2]]
return dict_inverse, links_to_be_removed
years = range(2000, 2018)
def get_output_list(L):
dict_inverse, links_to_be_removed = get_genre_from_link()
eligible_links = []
output = []
for link in L:
link = str(link)
#print(dict_inverse[str(link)])
if link[-1] == ".":
print("do something! Too many points.......")
if link[:-4] not in links_to_be_removed:
eligible_links += [link[:-4]]
return dict_inverse, eligible_links
dict_inverse, eligible_links = get_output_list(list_of_eligible_spectrums)
for file in eligible_links:
img = cv2.imread('SpectrumImages2005/' + file + '.jpg', 1)
img = img[0:1]
img = img.reshape((img.shape[1], img.shape[2]))
dict_inverse[file]['image'] += [img]
print('coucocu')
list_of_eligible_spectrums = []
for year in years:
for file in os.listdir("./spectrumImages/SpectrumImages" + str(year)):
if str(file)[-4:] == '.jpg':
list_of_eligible_spectrums += ['SpectrumImages'+ str(year) +'/' + file]
url = "https://api.themoviedb.org/3/genre/movie/list?"
API = 'api_key=a9075982d1f7ce05cc45adec0e5f5358&language=en-US'
......@@ -128,21 +48,12 @@ def sum_genre(list_years):
final_dict_genre[key] = dict_genre[key]
return final_dict_genre
sum_genre = sum_genre(range(2000, 2018))
# print(get_genres(2001).keys())
# #print(get_genres(2002).keys())
# print(get_genres(2003).keys())
# print(get_genres(2004).keys())
# print(get_genres(2005).keys())
#print(translate_genre())
sum_genre = sum_genre(range(2003, 2014))
L = []
for key in sum_genre.keys():
L += [str(key)]
print(L, list(sum_genre.values()))
print(sum(list(sum_genre.values())))
plt.bar(L, list(sum_genre.values()))
plt.show()
plt.show()
\ No newline at end of file
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment