Ce serveur Gitlab sera éteint le 30 juin 2020, pensez à migrer vos projets vers les serveurs gitlab-research.centralesupelec.fr et gitlab-student.centralesupelec.fr !

Commit adc8a632 authored by Hachemin Pierre-Yves's avatar Hachemin Pierre-Yves

valid_spectrum list + oversampling

parent 2e54a289
......@@ -4,7 +4,7 @@
"cell_type": "markdown",
"metadata": {},
"source": [
"# 1. CNN"
"# 1. LSTM"
]
},
{
......@@ -64,20 +64,16 @@
{
"cell_type": "code",
"execution_count": 2,
"metadata": {
"collapsed": true
},
"metadata": {},
"outputs": [],
"source": [
"years = [2004, 2005, 2006, 2007, 2008, 2015]"
"years = [2004, 2005, 2006, 2007, 2008, 2009,2010,2011, 2015]"
]
},
{
"cell_type": "code",
"execution_count": 115,
"metadata": {
"collapsed": true
},
"execution_count": 3,
"metadata": {},
"outputs": [],
"source": [
"list_of_eligible_spectrums = []\n",
......@@ -104,25 +100,13 @@
},
{
"cell_type": "code",
"execution_count": 116,
"execution_count": 4,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"do something! Too many points.......\n",
"do something! Too many points.......\n",
"do something! Too many points.......\n",
"do something! Too many points.......\n",
"do something! Too many points.......\n",
"do something! Too many points.......\n",
"do something! Too many points.......\n",
"do something! Too many points.......\n",
"do something! Too many points.......\n",
"do something! Too many points.......\n",
"do something! Too many points.......\n",
"do something! Too many points.......\n",
"do something! Too many points.......\n",
"do something! Too many points.......\n",
"do something! Too many points.......\n",
......@@ -187,10 +171,8 @@
},
{
"cell_type": "code",
"execution_count": 117,
"metadata": {
"collapsed": true
},
"execution_count": 5,
"metadata": {},
"outputs": [],
"source": [
"#for element in labels:\n",
......@@ -229,46 +211,9 @@
},
{
"cell_type": "code",
"execution_count": 118,
"execution_count": 6,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"'NoneType' object is not subscriptable\n",
"SpectrumImages2004/3jBFwltrxJw\n",
"'NoneType' object is not subscriptable\n",
"SpectrumImages2006/BH0MLyu6HjY\n",
"'NoneType' object is not subscriptable\n",
"SpectrumImages2008/1Gl2kVUsy2M\n",
"'NoneType' object is not subscriptable\n",
"SpectrumImages2015/-7S2u3k-OMU\n",
"'NoneType' object is not subscriptable\n",
"SpectrumImages2015/17yQpuf3LRA\n",
"'NoneType' object is not subscriptable\n",
"SpectrumImages2015/6HIlyaGAkXo\n",
"'NoneType' object is not subscriptable\n",
"SpectrumImages2015/clEBwkjs0sQ\n",
"'NoneType' object is not subscriptable\n",
"SpectrumImages2015/DNg9Oa5EHsc\n",
"'NoneType' object is not subscriptable\n",
"SpectrumImages2015/hsuKq5pNOcM\n",
"'NoneType' object is not subscriptable\n",
"SpectrumImages2015/j6e6Nc1emwg\n",
"'NoneType' object is not subscriptable\n",
"SpectrumImages2015/KHGHEpUeUwo\n",
"'NoneType' object is not subscriptable\n",
"SpectrumImages2015/ngesu4t3oKc\n",
"'NoneType' object is not subscriptable\n",
"SpectrumImages2015/tOilM3Ze-us\n",
"'NoneType' object is not subscriptable\n",
"SpectrumImages2015/uigIV1ALQYQ\n",
"'NoneType' object is not subscriptable\n",
"SpectrumImages2015/Xq6XgPSgzmA\n"
]
}
],
"outputs": [],
"source": [
"for file in eligible_links:\n",
" img = cv2.imread('./SpectrumImages/'+ file + '.jpg', 1)\n",
......@@ -278,6 +223,7 @@
" print(e)\n",
" print(file)\n",
" img = cv2.imread('./SpectrumImages/'+ file + '..jpg', 1)\n",
" os.rename('./SpectrumImages/'+ file + '..jpg','./SpectrumImages/'+ file + '.jpg')\n",
" img = img[0:1]\n",
" img = img.reshape((img.shape[1], img.shape[2]))\n",
" dict_inverse[file.split('/')[1]]['image'] = img"
......@@ -285,10 +231,8 @@
},
{
"cell_type": "code",
"execution_count": 119,
"metadata": {
"collapsed": true
},
"execution_count": 10,
"metadata": {},
"outputs": [],
"source": [
"df = pd.DataFrame.from_dict(dict_inverse)\n",
......@@ -300,10 +244,8 @@
},
{
"cell_type": "code",
"execution_count": 120,
"metadata": {
"collapsed": true
},
"execution_count": 11,
"metadata": {},
"outputs": [],
"source": [
"#df.describe()"
......@@ -311,16 +253,16 @@
},
{
"cell_type": "code",
"execution_count": 121,
"execution_count": 12,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"(10404, 2)"
"(16917, 2)"
]
},
"execution_count": 121,
"execution_count": 12,
"metadata": {},
"output_type": "execute_result"
}
......@@ -331,10 +273,8 @@
},
{
"cell_type": "code",
"execution_count": 122,
"metadata": {
"collapsed": true
},
"execution_count": 13,
"metadata": {},
"outputs": [],
"source": [
"df2 = df.dropna(axis=0)"
......@@ -342,16 +282,16 @@
},
{
"cell_type": "code",
"execution_count": 123,
"execution_count": 14,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"(5314, 2)"
"(7791, 2)"
]
},
"execution_count": 123,
"execution_count": 14,
"metadata": {},
"output_type": "execute_result"
}
......@@ -362,25 +302,25 @@
},
{
"cell_type": "code",
"execution_count": 124,
"execution_count": 15,
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"<div>\n",
"<style>\n",
" .dataframe thead tr:only-child th {\n",
" text-align: right;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: left;\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
......@@ -396,15 +336,15 @@
" <tbody>\n",
" <tr>\n",
" <th>action</th>\n",
" <td>2280</td>\n",
" <td>3362</td>\n",
" </tr>\n",
" <tr>\n",
" <th>comedy</th>\n",
" <td>1383</td>\n",
" <td>2013</td>\n",
" </tr>\n",
" <tr>\n",
" <th>drama</th>\n",
" <td>1651</td>\n",
" <td>2416</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
......@@ -413,12 +353,12 @@
"text/plain": [
" image\n",
"genre \n",
"action 2280\n",
"comedy 1383\n",
"drama 1651"
"action 3362\n",
"comedy 2013\n",
"drama 2416"
]
},
"execution_count": 124,
"execution_count": 15,
"metadata": {},
"output_type": "execute_result"
}
......@@ -427,6 +367,88 @@
"df2.groupby('genre').count()"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"### Oversampling"
]
},
{
"cell_type": "code",
"execution_count": 55,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"max number of spectrums for oversampling : 3362\n"
]
}
],
"source": [
"df_act = df2[df2['genre']=='action']\n",
"df_com = df2[df2['genre']=='comedy']\n",
"df_dra = df2[df2['genre']=='drama']\n",
"\n",
"count_act = df_act.groupby('genre').count()\n",
"count_com = df_com.groupby('genre').count()\n",
"count_dra = df_dra.groupby('genre').count()\n",
"\n",
"nb_act = count_act.iloc[0,0]\n",
"nb_com = count_com.iloc[0,0]\n",
"nb_dra = count_dra.iloc[0,0]\n",
"\n",
"nb_max = max(nb_act,nb_com,nb_dra)\n",
"\n",
"print(f\"max number of spectrums for oversampling : {nb_max}\")"
]
},
{
"cell_type": "code",
"execution_count": 56,
"metadata": {},
"outputs": [],
"source": [
"#shuffling\n",
"df_act = df_act.sample(frac=1)\n",
"df_com = df_com.sample(frac=1)\n",
"df_dra = df_dra.sample(frac=1)\n",
"\n",
"#adding data\n",
"df_act = df_act.append(df_act.iloc[:nb_max-nb_act, :])\n",
"df_com = df_com.append(df_com.iloc[:nb_max-nb_com, :])\n",
"df_dra = df_dra.append(df_dra.iloc[:nb_max-nb_dra, :])"
]
},
{
"cell_type": "code",
"execution_count": 57,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
" image\n",
"genre \n",
"action 3362\n",
" image\n",
"genre \n",
"comedy 3362\n",
" image\n",
"genre \n",
"drama 3362\n"
]
}
],
"source": [
"print(df_act.groupby('genre').count())\n",
"print(df_com.groupby('genre').count())\n",
"print(df_dra.groupby('genre').count())"
]
},
{
"cell_type": "code",
"execution_count": 125,
......@@ -1353,13 +1375,7 @@
"3830/3830 [==============================] - 24s 6ms/step - loss: 1.0636 - acc: 0.4245 - val_loss: 1.0572 - val_acc: 0.4111\n",
"Epoch 59/100\n",
"3830/3830 [==============================] - 19s 5ms/step - loss: 1.0667 - acc: 0.4274 - val_loss: 1.0578 - val_acc: 0.4099\n",
"Epoch 60/100\n"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"Epoch 60/100\n",
"3830/3830 [==============================] - 18s 5ms/step - loss: 1.0656 - acc: 0.4094 - val_loss: 1.0569 - val_acc: 0.4099\n",
"Epoch 61/100\n",
"3830/3830 [==============================] - 18s 5ms/step - loss: 1.0643 - acc: 0.4313 - val_loss: 1.0553 - val_acc: 0.4117\n",
......@@ -1640,13 +1656,7 @@
"3830/3830 [==============================] - 16s 4ms/step - loss: 1.0487 - acc: 0.4483 - val_loss: 1.0438 - val_acc: 0.4354\n",
"Epoch 59/100\n",
"3830/3830 [==============================] - 16s 4ms/step - loss: 1.0406 - acc: 0.4499 - val_loss: 1.0406 - val_acc: 0.4446\n",
"Epoch 60/100\n"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"Epoch 60/100\n",
"3830/3830 [==============================] - 16s 4ms/step - loss: 1.0475 - acc: 0.4384 - val_loss: 1.0416 - val_acc: 0.4397\n",
"Epoch 61/100\n",
"3830/3830 [==============================] - 16s 4ms/step - loss: 1.0472 - acc: 0.4499 - val_loss: 1.0466 - val_acc: 0.4306\n",
......@@ -1921,13 +1931,7 @@
"3830/3830 [==============================] - 16s 4ms/step - loss: 1.0391 - acc: 0.4527 - val_loss: 1.0370 - val_acc: 0.4470\n",
"Epoch 59/100\n",
"3830/3830 [==============================] - 16s 4ms/step - loss: 1.0436 - acc: 0.4530 - val_loss: 1.0401 - val_acc: 0.4446\n",
"Epoch 60/100\n"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"Epoch 60/100\n",
"3830/3830 [==============================] - 16s 4ms/step - loss: 1.0344 - acc: 0.4525 - val_loss: 1.0394 - val_acc: 0.4513\n",
"Epoch 61/100\n",
"3830/3830 [==============================] - 16s 4ms/step - loss: 1.0446 - acc: 0.4491 - val_loss: 1.0382 - val_acc: 0.4476\n",
......@@ -2100,7 +2104,7 @@
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.6.3"
"version": "3.6.4"
}
},
"nbformat": 4,
......
......@@ -24,5 +24,7 @@ def valid_spectrums(year):
os.remove(path + "/" + file)
print(ok_files)
# valid_spectrums(year)
years = [2004, 2005, 2006, 2007, 2008,2009,2010,2011, 2015]
for year in years:
valid_spectrums(year)
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment