Ce serveur Gitlab sera éteint le 30 juin 2020, pensez à migrer vos projets vers les serveurs gitlab-research.centralesupelec.fr et gitlab-student.centralesupelec.fr !

Data_Vis_Cleaning.py 4.84 KB
Newer Older
SoleneDc's avatar
SoleneDc committed
1
import ast
SoleneDc's avatar
SoleneDc committed
2
import os
3
import cv2
Hachemin Pierre-Yves's avatar
Hachemin Pierre-Yves committed
4 5
import requests
import matplotlib.pyplot as plt
SoleneDc's avatar
SoleneDc committed
6 7 8

#L = ['Sithad108Og.','fBNpSRtfIUA', '2e-eXJ6HgkQ', 'tN1A2mVnrOM', 'myTaigPrbsg', 'b75lZw8nkvo']

9 10 11 12 13 14 15
genres = [28, 35, 18, 99, 10749, 10752, 10402, 53, 878, 27, 9648, 80, 14, 12, 36, 10769, 16, 10751, 37, 10770]

list_of_eligible_spectrums = []
for file in os.listdir("SpectrumImages2005"):
    if str(file)[-4:] == '.jpg':
        list_of_eligible_spectrums += [file]
print(len(list_of_eligible_spectrums))
SoleneDc's avatar
SoleneDc committed
16

SoleneDc's avatar
SoleneDc committed
17
# def get_genre_from_link():
18
#     path = "./Link-dictionaries/Link-dictionary2005.txt"
SoleneDc's avatar
SoleneDc committed
19 20 21 22 23 24
#     file = open(path, "r").read()
#     dictyear = ast.literal_eval(file)
#     dict_inverse = {}
#     links_to_be_removed = []
#     for movie_id in dictyear.keys():
#         if dictyear[movie_id][1] != []:
25 26 27 28 29 30
#             dict_inverse[str(dictyear[movie_id][2])] = {}
#             for genre in genres:
#                 if genre in dictyear[movie_id][1]:
#                     dict_inverse[str(dictyear[movie_id][2])][genre] = 1
#                 else:
#                     dict_inverse[str(dictyear[movie_id][2])][genre] = 0
SoleneDc's avatar
SoleneDc committed
31
#         else:
32
#             #print(f'careful, link {dictyear[movie_id][2]} needs to be removed from the list')
SoleneDc's avatar
SoleneDc committed
33 34 35 36 37 38
#             links_to_be_removed += [dictyear[movie_id][2]]
#     return dict_inverse, links_to_be_removed
#
#
# def get_output_list(L):
#     dict_inverse, links_to_be_removed = get_genre_from_link()
39
#     eligible_links = []
SoleneDc's avatar
SoleneDc committed
40 41
#     output = []
#     for link in L:
42 43 44 45 46 47 48 49
#         link = str(link)
#         #print(dict_inverse[str(link)])
#         if link[-1] == ".":
#             print("do something! Too many points.......")
#         if link[:-4] not in links_to_be_removed:
#             output += [dict_inverse[link[:-4]]]
#             eligible_links += [link]
#     return output, eligible_links
SoleneDc's avatar
SoleneDc committed
50

51 52

# labels, eligible_links = get_output_list(list_of_eligible_spectrums)
SoleneDc's avatar
SoleneDc committed
53

SoleneDc's avatar
SoleneDc committed
54
def get_genre_from_link():
SoleneDc's avatar
SoleneDc committed
55
    path = "./Link-dictionaries/Link-dictionary2005.txt"
SoleneDc's avatar
SoleneDc committed
56 57 58 59 60 61
    file = open(path, "r").read()
    dictyear = ast.literal_eval(file)
    dict_inverse = {}
    links_to_be_removed = []
    for movie_id in dictyear.keys():
        if dictyear[movie_id][1] != []:
62 63 64 65 66 67
            dict_inverse[str(dictyear[movie_id][2])] = {}
            for genre in genres:
                if genre in dictyear[movie_id][1]:
                    dict_inverse[str(dictyear[movie_id][2])][genre] = 1
                else:
                    dict_inverse[str(dictyear[movie_id][2])][genre] = 0
SoleneDc's avatar
SoleneDc committed
68
        else:
SoleneDc's avatar
SoleneDc committed
69
            #print(f'careful, link {dictyear[movie_id][2]} needs to be removed from the list')
SoleneDc's avatar
SoleneDc committed
70 71 72 73 74
            links_to_be_removed += [dictyear[movie_id][2]]
    return dict_inverse, links_to_be_removed

def get_output_list(L):
    dict_inverse, links_to_be_removed = get_genre_from_link()
SoleneDc's avatar
SoleneDc committed
75
    eligible_links = []
SoleneDc's avatar
SoleneDc committed
76 77
    output = []
    for link in L:
SoleneDc's avatar
SoleneDc committed
78 79
        link = str(link)
        #print(dict_inverse[str(link)])
80
        if link[-1] == ".":
SoleneDc's avatar
SoleneDc committed
81
            print("do something! Too many points.......")
82 83 84 85 86 87 88 89 90 91 92 93
        if link[:-4] not in links_to_be_removed:
            eligible_links += [link[:-4]]
    return dict_inverse, eligible_links


dict_inverse, eligible_links = get_output_list(list_of_eligible_spectrums)

for file in eligible_links:
    img = cv2.imread('SpectrumImages2005/' + file + '.jpg', 1)
    img = img[0:1]
    img = img.reshape((img.shape[1], img.shape[2]))
    dict_inverse[file]['image'] += [img]
SoleneDc's avatar
SoleneDc committed
94

95
print('coucocu')
Hachemin Pierre-Yves's avatar
Hachemin Pierre-Yves committed
96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148

url = "https://api.themoviedb.org/3/genre/movie/list?"
API = 'api_key=a9075982d1f7ce05cc45adec0e5f5358&language=en-US'

def get_genres(year):
    path = "./Link-dictionaries/Link-dictionary" + str(year) +".txt"
    file = open(path, "r").read()
    dictyear = ast.literal_eval(file)
    dict_genres = {}
    for movie_id in dictyear.keys():
        genre_ids = dictyear[movie_id][1]
        for genre_id in genre_ids:
            if genre_id in dict_genres.keys():
                dict_genres[genre_id] += 1
            else:
                dict_genres[genre_id] = 1
    return dict_genres

def translate_genre():
    url_final = url + API
    req = requests.get(url_final)
    print(req.json()["genres"])

def sum_genre(list_years):
    final_dict_genre = {}
    for year in list_years:
        print(f'dealing with {year}...')
        dict_genre = get_genres(year)
        for key in dict_genre.keys():
            if key in final_dict_genre.keys():
                final_dict_genre[key] += dict_genre[key]
            else:
                final_dict_genre[key] = dict_genre[key]
    return final_dict_genre



# print(get_genres(2001).keys())
# #print(get_genres(2002).keys())
# print(get_genres(2003).keys())
# print(get_genres(2004).keys())
# print(get_genres(2005).keys())
#print(translate_genre())
sum_genre = sum_genre(range(2003, 2014))
L = []
for key in sum_genre.keys():
    L += [str(key)]
print(L, list(sum_genre.values()))
print(sum(list(sum_genre.values())))
plt.bar(L, list(sum_genre.values()))

plt.show()