Ce serveur Gitlab sera éteint le 30 juin 2020, pensez à migrer vos projets vers les serveurs gitlab-research.centralesupelec.fr et gitlab-student.centralesupelec.fr !

Commit 54ec7d21 authored by SoleneDc's avatar SoleneDc

Merge branch 'master' of /Users/soleneduchamp/Documents/OSY/Vision par...

Merge branch 'master' of /Users/soleneduchamp/Documents/OSY/Vision par ordinateur/Spectrum with conflicts.
parent ac09ba2b
...@@ -21,5 +21,7 @@ spectrumImages2015/ ...@@ -21,5 +21,7 @@ spectrumImages2015/
spectrumImages2016/ spectrumImages2016/
spectrumImages2017/ spectrumImages2017/
spectrumImages/ spectrumImages/
.DS_Store
Results_local.xlsx
Spectrums_sample/ Spectrums_sample/
Results.xlsx Results.xlsx
{365353: [0, [12, 99], '4NJNI-gOLVg'], 497628: [0, [10752, 99, 36], 'C2NGDEh5SLc'], 405307: [0, [16], '5PLzsy0LYs4'], 340183: [0, [35, 28], 'qMwzfbTEcCc'], 337086: [0, [18], 'yrgcLQgP1IA'], 354441: [0, [53], 'bWtYwnk8zTo'], 395939: [0, [99], 'Db18y15lkik'], 354888: [0, [27], '0SW-rOji2T0'], 404051: [0, [99], 'bYb35M28p-M'], 357452: [0, [9648, 80, 18], '88IuZOoGpN4'], 429773: [0, [99], 'qV0u5MifDfE'], 332489: [0, [99], 'HgmlcH3VUZg'], 379644: [0, [18], 'l4GgQuQgvvA'], 353625: [0, [], 'flTNwaj9IUg'], 413171: [0, [28, 18], '2a9iocPixXk'], 509501: [0, [27], '5M9hado4UhE'], 357598: [0, [18, 35], 'gVe1xJSLHMk'], 365582: [0, [99], '0hDMFYIQ-LM'], 354165: [0, [], 'nHx_pcbV0_E'], 363970: [0, [18, 53], '7CqNtibGLNc'], 451404: [0, [53, 18], 'YUbxoXTWSmI'], 466771: [0, [], 'PKCGgHMOtj8'], 357888: [0, [], 'Yt-74kaEF_k'], 469580: [0, [18], '-_ZGzJvse5c'], 353775: [0, [], 'W4nvwBhKPJg'], 341017: [0, [18, 28], 'MZ5U-iWelm0'], 345720: [0, [99], 'I8Bup69WHcg'], 418203: [0, [10749, 35], 'YPNZdWgsjDU'], 348891: [0, [18], 'TL9E5K9VtAw'], 474223: [0, [], 'q8XBYW38mtI'], 449164: [0, [10749, 18], 'Ij_oQmRF4DM'], 377048: [0, [53, 28], 'h-jD4mqRjb4'], 353186: [0, [10402], 'uoO7B3o_k5U'], 340509: [0, [35], '6YIOaAVmmIg'], 466525: [0, [18], 'ZI4btjn1t34'], 465276: [0, [10749, 35], '8WYHTs3EdNc'], 413143: [0, [53, 18], 'lmoMvBRZn_I'], 338603: [0, [99], 'l6FGRryGbqY'], 361611: [0, [99], 'UWi41Rdm0Qs'], 319773: [0, [18, 10749], '_VGBCCljb5g'], 413437: [0, [53, 28, 10749], '3chrcXw1ZQg'], 375656: [0, [18], 'z5hGN1uuMYw'], 504194: [0, [35, 18], 'nJlEyMi5ja8'], 364308: [0, [27], 'PMsvXcbCczM'], 361565: [0, [99], 'Zq8H0rEJ-ss'], 397991: [0, [], '2TDXIp8J4LY'], 421262: [0, [16], '2WjsEHp0Eqo'], 445038: [0, [35], 'hzSVySzaG1I'], 382476: [0, [27, 35], '0A557EFGnAU'], 382050: [0, [27], 'qpLi6NQTMCg'], 362369: [0, [10402, 99], 'brO8BR-kFF0'], 412142: [0, [35], '_cpJ9Q-sqdg'], 331319: [0, [], 'DfA9nK_QZJ8'], 470746: [0, [99], 'FEc_D_meOBY'], 425535: [0, [18, 35], 'AeMvfHaZql4'], 404085: [0, [99], 'zQp7e46OV04'], 447713: [0, [18, 35, 10749], 'SXryT800nS4'], 345176: [0, [35], 'WG9kuUw1ssE'], 443264: [0, [99], 'f1HI4daPWFM'], 360401: [0, [99], 'IlNNu5s7lwI'], 453441: [0, [], 'aky9FFj4ybE'], 428394: [0, [53, 27], 'a_5mOuEO7RY'], 344699: [0, [], 'VIjgJMeINXE'], 466403: [0, [10749, 28, 53], 'GFnwImtVAbw'], 405319: [0, [16], '6ImLlX7V-sU'], 413546: [0, [35], 'mmKHHDzDAyM'], 493283: [0, [35], '89DjhTUXcnM'], 331383: [0, [27], '2OILnfXrNqM'], 338757: [0, [99], 'oOLaSo2-B3E'], 493939: [0, [], 'NmiEjkhjkYI'], 338274: [0, [53], 'Mg9hwk2XzlQ'], 298886: [0, [28, 53], 'Ih1BazoQMQY'], 465126: [0, [36, 18], 'rnFZgdHXnmI'], 393835: [0, [], 'owo_mK3HmLo'], 339818: [0, [99], 'b-gRcXPjHaw'], 505557: [0, [53, 27], 'tP20D69PzsU'], 362277: [0, [], '8DIjyFpXBlk'], 488099: [0, [99], '59j2AxHGKmQ'], 397826: [0, [99], 'gr2AptgjvnE'], 504223: [0, [28, 35, 18], 'PuLisEMlZOQ'], 352455: [0, [], 'QJF6IzXApag'], 506330: [0, [35], 'wor0jrZt4tw'], 345153: [0, [35, 10751], 'p8ffMMdQw2c'], 107679: [0, [], 'i3Bdty8obeE'], 352843: [0, [12, 36, 10402], 'ivJPh39URPY'], 341032: [0, [99], '6KnQl-wbccI'], 339027: [0, [99], 'VT22K0MkQcI'], 333866: [0, [35, 27], '3ZjvbYa8dfo'], 324604: [0, [99], 'Ozp-_pAi0Zs'], 457609: [0, [], 'pJyyuGlT7i0'], 331861: [0, [28, 35], 'Cq4JePOaJZQ'], 372858: [0, [10749, 18], 'oj7taZ5uOTE'], 365375: [0, [18], 'efVmkjEHr4M'], 394714: [0, [], 'Wh7BdvS9FAQ'], 422957: [0, [35, 99], 'Phfz8eDgsNY'], 332507: [0, [53, 18], 'ERBABKtAKDw'], 414344: [0, [35, 10749], 'vOmjTcCrzl0'], 458581: [0, [99], 'jFhsqXb_BUU'], 511013: [0, [18, 35], 'td2j3aA_ktI'], 328725: [0, [], '_9t3p8DztXk'], 490974: [0, [18], 'DLJDgd8EkKs'], 391628: [0, [99], 'eXhy5prz83k'], 339146: [0, [18, 10749], 'HwZZyHcfhfk'], 342745: [0, [99], 's16jiyd7Ago'], 318131: [0, [], 'MNd_1kJ9y5s'], 388172: [0, [878, 18], 's7v7wdhLlew'], 484244: [0, [35], 'mJfoUAXUXiA'], 509562: [0, [36], 'wpFyfLp2CP4'], 351229: [0, [99], 'vGJEc9n5SLM'], 380491: [0, [35, 18, 10749], 'YJlCn5YMwqo'], 328327: [0, [35], 'oTt-XgnOPrY'], 340265: [0, [35, 9648], 'LVgnzAtwCZw'], 316336: [0, [99], 'vAdJg4BWpD4'], 421015: [0, [], 'SgwurgCZdvU'], 366875: [0, [99], 'kash5VkAKkk'], 357899: [0, [99, 36], 'oKz4kV0sSTE'], 486816: [0, [], 'XJUEhZIe6rU'], 492803: [0, [18], '8wKwzWQEk5Q'], 397500: [0, [18], 'VOvD8adaErk'], 331445: [0, [18], '3p5Tc3QKHv0'], 323583: [0, [99, 10402], 'dGtq4HTapDA'], 388971: [0, [10749, 35, 18], 'j43eCUAgWOo'], 361761: [0, [28, 12, 99, 36], '40Jr2ZDHDnY'], 369327: [0, [18, 35], 'HGP5pN48d1w'], 340216: [0, [], 'IL6oCpPQRr8'], 402320: [0, [99], 'W0uohwW4Pfo'], 353857: [0, [27], 'FKuON-_CnYI'], 437523: [0, [10402], 'AXP6bnyqjHw'], 512317: [0, [99, 10402], 'iBMWa3NfqKY'], 318056: [0, [99], 'X3nr1nlTs3g'], 336156: [0, [99], 'cGS2CAsD1ME'], 406839: [0, [10402, 18], 'tHgs7yAjQWY'], 448115: [0, [10749, 18], 'cayOtDuO_6U'], 327381: [0, [18], 'rPu-DN58KuM'], 335589: [0, [18], '_K6XqQvP7xQ'], 328870: [0, [9648, 27], 'qe_5qfulIZM'], 347443: [0, [35], '5SWUdnTO5zI'], 413438: [0, [18, 53, 27], 'uGGeBUJQjNg'], 384308: [0, [], 'ZXitifMbUqQ'], 418926: [0, [], 'qTsA6-Rbw7U'], 335909: [0, [], '3pRgBV_8TT4'], 421685: [0, [], '7nBSdPftSJY'], 414852: [0, [99], 'bDnglcBLTs0'], 510304: [0, [18, 80], 'KburpTTF0Zg'], 429803: [0, [35], 'uwswgMs1nuk'], 436702: [0, [], '6sV3pxB86Y4'], 494815: [0, [35, 10749], 'UA86vDkbPRA'], 402905: [0, [18, 9648, 10749], 'nP4q4MRGfMY'], 375733: [0, [99, 10402], '5ZXkjtO4gjM'], 360728: [0, [10749, 18], 'BvjKfxBCSaU'], 471673: [0, [27, 28], 'crn4W839LrA'], 329327: [0, [18], 'SCfX1gfyag8'], 319971: [0, [35], '_G6TL-Nucts'], 348969: [0, [14, 16], 'gxLN09Kajgc'], 363361: [0, [18, 878, 35], 'dSAeRJBXeIA'], 363901: [0, [], 'WE3t3vB_1TM'], 482603: [0, [35], 'h2dXqtsbby0'], 497856: [0, [53], 'oOmloGfNONc'], 387572: [0, [99], 'OXNCY6bjMP8'], 411189: [0, [35, 10749], '8inxztzBlL4'], 361762: [0, [10402, 99], '58oKcz3UPl4'], 363348: [0, [18], '8_QMDmdqfGM'], 366913: [0, [18, 36], 'SHpwXHCOaGM'], 330008: [0, [], 'EA03oQnQ6IE'], 309885: [0, [53, 27], '3kp3_P0XzzU'], 337042: [0, [], 't_gTgChDBXk'], 407520: [0, [], 'aSZKo-d63sI'], 494578: [0, [35, 18], 'iEG0rDQheWs'], 374644: [0, [99], 'mXhq4EXilQc'], 394717: [0, [], 'dU9358Qhy6Q'], 338590: [0, [99, 36], '_fC2TsHnmoM'], 328950: [0, [878], 'LVvndRkEFa4'], 339341: [0, [99], 'zajcMv4MTMs'], 471895: [0, [35, 27, 14], 'y-_L548ZYIw'], 348651: [0, [28, 16], 'ekz-FY_MDGA'], 324321: [0, [35, 18], 'UoY_J8NDU1s'], 362374: [0, [99], 'zk_jTvYu2i8'], 407512: [0, [], 'UNHgbWuLYH4'], 407514: [0, [], 'CMNFgNHsiBw'], 394605: [0, [99, 36, 10770], 'eYXiwk24pnc'], 232504: [0, [18], '6ttCNr_Mna4'], 449016: [0, [18, 10749], 'Tn7AZVHJhqw'], 314590: [0, [], 'WWjEvwQDLlE'], 295881: [0, [18], 'XOgR0cNtBJU'], 364489: [0, [878, 16, 27, 14], 'WRz8RLotl7s'], 249108: [0, [99], 's3vAH_uTjI0'], 449033: [0, [9648, 53], 'fQMez6DmNrM'], 293630: [0, [], 'sYZWx6IVFb8'], 389224: [0, [35, 16], 'YEl1q90SjkU'], 98593: [0, [], 'nJ8aT7OUSr0'], 374016: [0, [12, 16], 'Ga89i7r5R8A'], 323593: [0, [35, 16], 'kuPoScMR078'], 472302: [0, [27], 'Xo05JyJPxf8'], 102935: [0, [80, 99], 'oBOH-hG-Jug'], 400584: [0, [35, 10402, 37], 'Jx-2EDIzS-g'], 498384: [0, [10402, 99], 'Y3h0CuuPjfs'], 355036: [8.5, [99], 'NCPpvwHRlJs'], 369570: [0, [18], 'Pk5fI3ckcP8'], 429448: [0, [99], 'jOnHBHy9r54'], 427060: [0, [53, 27, 80, 9648], 'WK5w2rYEd4c']}
\ No newline at end of file
...@@ -4,7 +4,7 @@ ...@@ -4,7 +4,7 @@
"cell_type": "markdown", "cell_type": "markdown",
"metadata": {}, "metadata": {},
"source": [ "source": [
"# 1. LSTM" "# 1. CNN"
] ]
}, },
{ {
...@@ -23,17 +23,9 @@ ...@@ -23,17 +23,9 @@
}, },
{ {
"cell_type": "code", "cell_type": "code",
"execution_count": 1, "execution_count": 178,
"metadata": {}, "metadata": {},
"outputs": [ "outputs": [],
{
"name": "stderr",
"output_type": "stream",
"text": [
"Using TensorFlow backend.\n"
]
}
],
"source": [ "source": [
"import ast\n", "import ast\n",
"import pandas as pd\n", "import pandas as pd\n",
...@@ -63,17 +55,21 @@ ...@@ -63,17 +55,21 @@
}, },
{ {
"cell_type": "code", "cell_type": "code",
"execution_count": 2, "execution_count": 179,
"metadata": {}, "metadata": {
"collapsed": true
},
"outputs": [], "outputs": [],
"source": [ "source": [
"years = [2004, 2005, 2006, 2007, 2008, 2009,2010,2011, 2015]" "years = [2004, 2005, 2006, 2007, 2008, 2009, 2010, 2015]"
] ]
}, },
{ {
"cell_type": "code", "cell_type": "code",
"execution_count": 3, "execution_count": 180,
"metadata": {}, "metadata": {
"collapsed": true
},
"outputs": [], "outputs": [],
"source": [ "source": [
"list_of_eligible_spectrums = []\n", "list_of_eligible_spectrums = []\n",
...@@ -100,13 +96,28 @@ ...@@ -100,13 +96,28 @@
}, },
{ {
"cell_type": "code", "cell_type": "code",
"execution_count": 4, "execution_count": 181,
"metadata": {}, "metadata": {},
"outputs": [ "outputs": [
{ {
"name": "stdout", "name": "stdout",
"output_type": "stream", "output_type": "stream",
"text": [ "text": [
"do something! Too many points.......\n",
"do something! Too many points.......\n",
"do something! Too many points.......\n",
"do something! Too many points.......\n",
"do something! Too many points.......\n",
"do something! Too many points.......\n",
"do something! Too many points.......\n",
"do something! Too many points.......\n",
"do something! Too many points.......\n",
"do something! Too many points.......\n",
"do something! Too many points.......\n",
"do something! Too many points.......\n",
"do something! Too many points.......\n",
"do something! Too many points.......\n",
"do something! Too many points.......\n",
"do something! Too many points.......\n", "do something! Too many points.......\n",
"do something! Too many points.......\n", "do something! Too many points.......\n",
"do something! Too many points.......\n", "do something! Too many points.......\n",
...@@ -171,8 +182,10 @@ ...@@ -171,8 +182,10 @@
}, },
{ {
"cell_type": "code", "cell_type": "code",
"execution_count": 5, "execution_count": 182,
"metadata": {}, "metadata": {
"collapsed": true
},
"outputs": [], "outputs": [],
"source": [ "source": [
"#for element in labels:\n", "#for element in labels:\n",
...@@ -211,9 +224,45 @@ ...@@ -211,9 +224,45 @@
}, },
{ {
"cell_type": "code", "cell_type": "code",
"execution_count": 6, "execution_count": 183,
"metadata": {}, "metadata": {},
"outputs": [], "outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"'NoneType' object is not subscriptable\n",
"SpectrumImages2004/3jBFwltrxJw\n",
"'NoneType' object is not subscriptable\n",
"SpectrumImages2006/BH0MLyu6HjY\n",
"'NoneType' object is not subscriptable\n",
"SpectrumImages2015/-7S2u3k-OMU\n",
"'NoneType' object is not subscriptable\n",
"SpectrumImages2015/17yQpuf3LRA\n",
"'NoneType' object is not subscriptable\n",
"SpectrumImages2015/6HIlyaGAkXo\n",
"'NoneType' object is not subscriptable\n",
"SpectrumImages2015/clEBwkjs0sQ\n",
"'NoneType' object is not subscriptable\n",
"SpectrumImages2015/dbEkCWSBckI\n"
]
},
{
"ename": "TypeError",
"evalue": "'NoneType' object is not subscriptable",
"output_type": "error",
"traceback": [
"\u001b[0;31m---------------------------------------------------------------------------\u001b[0m",
"\u001b[0;31mTypeError\u001b[0m Traceback (most recent call last)",
"\u001b[0;32m<ipython-input-183-bfe3c27511d0>\u001b[0m in \u001b[0;36m<module>\u001b[0;34m()\u001b[0m\n\u001b[1;32m 3\u001b[0m \u001b[0;32mtry\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m----> 4\u001b[0;31m \u001b[0mimg\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mimg\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0;36m0\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;36m1\u001b[0m\u001b[0;34m]\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 5\u001b[0m \u001b[0;32mexcept\u001b[0m \u001b[0mException\u001b[0m \u001b[0;32mas\u001b[0m \u001b[0me\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n",
"\u001b[0;31mTypeError\u001b[0m: 'NoneType' object is not subscriptable",
"\nDuring handling of the above exception, another exception occurred:\n",
"\u001b[0;31mTypeError\u001b[0m Traceback (most recent call last)",
"\u001b[0;32m<ipython-input-183-bfe3c27511d0>\u001b[0m in \u001b[0;36m<module>\u001b[0;34m()\u001b[0m\n\u001b[1;32m 7\u001b[0m \u001b[0mprint\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mfile\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 8\u001b[0m \u001b[0mimg\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mcv2\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mimread\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m'./SpectrumImages/'\u001b[0m\u001b[0;34m+\u001b[0m \u001b[0mfile\u001b[0m \u001b[0;34m+\u001b[0m \u001b[0;34m'..jpg'\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0;36m1\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m----> 9\u001b[0;31m \u001b[0mimg\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mimg\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0;36m0\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;36m1\u001b[0m\u001b[0;34m]\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 10\u001b[0m \u001b[0mimg\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mimg\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mreshape\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mimg\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mshape\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0;36m1\u001b[0m\u001b[0;34m]\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mimg\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mshape\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0;36m2\u001b[0m\u001b[0;34m]\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 11\u001b[0m \u001b[0mdict_inverse\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0mfile\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0msplit\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m'/'\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0;36m1\u001b[0m\u001b[0;34m]\u001b[0m\u001b[0;34m]\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0;34m'image'\u001b[0m\u001b[0;34m]\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mimg\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n",
"\u001b[0;31mTypeError\u001b[0m: 'NoneType' object is not subscriptable"
]
}
],
"source": [ "source": [
"for file in eligible_links:\n", "for file in eligible_links:\n",
" img = cv2.imread('./SpectrumImages/'+ file + '.jpg', 1)\n", " img = cv2.imread('./SpectrumImages/'+ file + '.jpg', 1)\n",
...@@ -223,7 +272,6 @@ ...@@ -223,7 +272,6 @@
" print(e)\n", " print(e)\n",
" print(file)\n", " print(file)\n",
" img = cv2.imread('./SpectrumImages/'+ file + '..jpg', 1)\n", " img = cv2.imread('./SpectrumImages/'+ file + '..jpg', 1)\n",
" os.rename('./SpectrumImages/'+ file + '..jpg','./SpectrumImages/'+ file + '.jpg')\n",
" img = img[0:1]\n", " img = img[0:1]\n",
" img = img.reshape((img.shape[1], img.shape[2]))\n", " img = img.reshape((img.shape[1], img.shape[2]))\n",
" dict_inverse[file.split('/')[1]]['image'] = img" " dict_inverse[file.split('/')[1]]['image'] = img"
...@@ -231,8 +279,10 @@ ...@@ -231,8 +279,10 @@
}, },
{ {
"cell_type": "code", "cell_type": "code",
"execution_count": 10, "execution_count": 119,
"metadata": {}, "metadata": {
"collapsed": true
},
"outputs": [], "outputs": [],
"source": [ "source": [
"df = pd.DataFrame.from_dict(dict_inverse)\n", "df = pd.DataFrame.from_dict(dict_inverse)\n",
...@@ -244,8 +294,76 @@ ...@@ -244,8 +294,76 @@
}, },
{ {
"cell_type": "code", "cell_type": "code",
"execution_count": 11, "execution_count": 176,
"metadata": {},
"outputs": [],
"source": [
"df_com = df2[df2['genre']== 'comedy']\n",
"df_com = df_com.sample(frac=1)\n",
"df"
]
},
{
"cell_type": "code",
"execution_count": 177,
"metadata": {}, "metadata": {},
"outputs": [
{
"data": {
"text/html": [
"<div>\n",
"<style>\n",
" .dataframe thead tr:only-child th {\n",
" text-align: right;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: left;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>image</th>\n",
" </tr>\n",
" <tr>\n",
" <th>genre</th>\n",
" <th></th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>comedy</th>\n",
" <td>1383</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"</div>"
],
"text/plain": [
" image\n",
"genre \n",
"comedy 1383"
]
},
"execution_count": 177,
"metadata": {},
"output_type": "execute_result"
}
],
"source": []
},
{
"cell_type": "code",
"execution_count": 120,
"metadata": {
"collapsed": true
},
"outputs": [], "outputs": [],
"source": [ "source": [
"#df.describe()" "#df.describe()"
...@@ -253,16 +371,16 @@ ...@@ -253,16 +371,16 @@
}, },
{ {
"cell_type": "code", "cell_type": "code",
"execution_count": 12, "execution_count": 121,
"metadata": {}, "metadata": {},
"outputs": [ "outputs": [
{ {
"data": { "data": {
"text/plain": [ "text/plain": [
"(16917, 2)" "(10404, 2)"
] ]
}, },
"execution_count": 12, "execution_count": 121,
"metadata": {}, "metadata": {},
"output_type": "execute_result" "output_type": "execute_result"
} }
...@@ -273,8 +391,10 @@ ...@@ -273,8 +391,10 @@
}, },
{ {
"cell_type": "code", "cell_type": "code",
"execution_count": 13, "execution_count": 122,
"metadata": {}, "metadata": {
"collapsed": true
},
"outputs": [], "outputs": [],
"source": [ "source": [
"df2 = df.dropna(axis=0)" "df2 = df.dropna(axis=0)"
...@@ -282,16 +402,16 @@ ...@@ -282,16 +402,16 @@
}, },
{ {
"cell_type": "code", "cell_type": "code",
"execution_count": 14, "execution_count": 123,
"metadata": {}, "metadata": {},
"outputs": [ "outputs": [
{ {
"data": { "data": {
"text/plain": [ "text/plain": [
"(7791, 2)" "(5314, 2)"
] ]
}, },
"execution_count": 14, "execution_count": 123,
"metadata": {}, "metadata": {},
"output_type": "execute_result" "output_type": "execute_result"
} }
...@@ -302,24 +422,24 @@ ...@@ -302,24 +422,24 @@
}, },
{ {
"cell_type": "code", "cell_type": "code",
"execution_count": 15, "execution_count": 124,
"metadata": {}, "metadata": {},
"outputs": [ "outputs": [
{ {
"data": { "data": {
"text/html": [ "text/html": [
"<div>\n", "<div>\n",
"<style scoped>\n", "<style>\n",
" .dataframe tbody tr th:only-of-type {\n", " .dataframe thead tr:only-child th {\n",
" vertical-align: middle;\n", " text-align: right;\n",
" }\n", " }\n",
"\n", "\n",
" .dataframe tbody tr th {\n", " .dataframe thead th {\n",
" vertical-align: top;\n", " text-align: left;\n",
" }\n", " }\n",
"\n", "\n",
" .dataframe thead th {\n", " .dataframe tbody tr th {\n",
" text-align: right;\n", " vertical-align: top;\n",
" }\n", " }\n",
"</style>\n", "</style>\n",
"<table border=\"1\" class=\"dataframe\">\n", "<table border=\"1\" class=\"dataframe\">\n",
...@@ -336,15 +456,15 @@ ...@@ -336,15 +456,15 @@
" <tbody>\n", " <tbody>\n",
" <tr>\n", " <tr>\n",
" <th>action</th>\n", " <th>action</th>\n",
" <td>3362</td>\n", " <td>2280</td>\n",
" </tr>\n", " </tr>\n",
" <tr>\n", " <tr>\n",
" <th>comedy</th>\n", " <th>comedy</th>\n",
" <td>2013</td>\n", " <td>1383</td>\n",
" </tr>\n", " </tr>\n",
" <tr>\n", " <tr>\n",
" <th>drama</th>\n", " <th>drama</th>\n",
" <td>2416</td>\n", " <td>1651</td>\n",
" </tr>\n", " </tr>\n",
" </tbody>\n", " </tbody>\n",
"</table>\n", "</table>\n",
...@@ -353,12 +473,12 @@ ...@@ -353,12 +473,12 @@
"text/plain": [ "text/plain": [
" image\n", " image\n",
"genre \n", "genre \n",
"action 3362\n", "action 2280\n",
"comedy 2013\n", "comedy 1383\n",
"drama 2416" "drama 1651"
] ]
}, },
"execution_count": 15, "execution_count": 124,
"metadata": {}, "metadata": {},
"output_type": "execute_result" "output_type": "execute_result"
} }
...@@ -367,170 +487,627 @@ ...@@ -367,170 +487,627 @@
"df2.groupby('genre').count()" "df2.groupby('genre').count()"
] ]
}, },
{
"cell_type": "markdown",
"metadata": {},
"source": [
"### Oversampling"
]
},
{ {
"cell_type": "code", "cell_type": "code",
"execution_count": 72, "execution_count": 125,
"metadata": {}, "metadata": {},
"outputs": [ "outputs": [
{ {
"name": "stdout", "name": "stdout",
"output_type": "stream", "output_type": "stream",
"text": [ "text": [
"max number of spectrums for oversampling : 3362\n", "Hasard: 42 %\n"
" image\n",
"genre \n",
"action 3362\n",
"comedy 3362\n",
"drama 3362\n"
] ]
} }
], ],
"source": [ "source": [
"df_act = df2[df2['genre']=='action']\n", "dict_num_genres = (df2.groupby('genre').count())['image'].to_dict()\n",
"df_com = df2[df2['genre']=='comedy']\n", "sum_spectrums = 0\n",
"df_dra = df2[df2['genre']=='drama']\n", "popular_genre = 0\n",
"\n", "for num in dict_num_genres.values():\n",
"count_act = df_act.groupby('genre').count()\n", " sum_spectrums += num\n",
"count_com = df_com.groupby('genre').count()\n", " if num > popular_genre:\n",
"count_dra = df_dra.groupby('genre').count()\n", " popular_genre = num\n",
"\n", "hasard = int((popular_genre / sum_spectrums)*100)\n",
"nb_act = count_act.iloc[0,0]\n", "print('Hasard:', int((popular_genre / sum_spectrums)*100), \"%\")"
"nb_com = count_com.iloc[0,0]\n", ]
"nb_dra = count_dra.iloc[0,0]\n", },
"\n", {
"nb_max = max(nb_act,nb_com,nb_dra)\n", "cell_type": "code",
"\n", "execution_count": null,
"print(f\"max number of spectrums for oversampling : {nb_max}\")\n", "metadata": {
"\n", "collapsed": true
"#shuffling\n", },
"df_act = df_act.sample(frac=1)\n", "outputs": [],
"df_com = df_com.sample(frac=1)\n", "source": [
"df_dra = df_dra.sample(frac=1)\n", "df_com = "
"\n", ]
"#adding data\n", },
"df_act = df_act.append(df_act.iloc[:nb_max-nb_act, :])\n", {
"df_com = df_com.append(df_com.iloc[:nb_max-nb_com, :])\n", "cell_type": "code",
"df_dra = df_dra.append(df_dra.iloc[:nb_max-nb_dra, :])\n", "execution_count": 126,
"\n", "metadata": {
"#concatenate\n", "collapsed": true
"df_sym=pd.concat([df_com,df_act,df_dra])\n", },
"print(df_sym.groupby('genre').count())\n", "outputs": [],
"df_sym = pd.get_dummies(df_sym,columns=['genre'])" "source": [
"df3 = pd.get_dummies(df2,columns=['genre'])"
] ]
}, },
{ {
"cell_type": "code", "cell_type": "code",
"execution_count": 58, "execution_count": 128,
"metadata": {}, "metadata": {},
"outputs": [ "outputs": [
{ {
"data": { "data": {
"text/plain": [ "text/plain": [
"'dict_num_genres = (df2.groupby(\\'genre\\').count())[\\'image\\'].to_dict()\\nsum_spectrums = 0\\npopular_genre = 0\\nfor num in dict_num_genres.values():\\n sum_spectrums += num\\n if num > popular_genre:\\n popular_genre = num\\nhasard = int((popular_genre / sum_spectrums)*100)\\nprint(\\'Hasard:\\', int((popular_genre / sum_spectrums)*100), \"%\")'" "897"
] ]
}, },
"execution_count": 58, "execution_count": 128,
"metadata": {}, "metadata": {},
"output_type": "execute_result" "output_type": "execute_result"
} }
], ],
"source": [ "source": [
"\"\"\"dict_num_genres = (df2.groupby('genre').count())['image'].to_dict()\n", "2280-1383"
"sum_spectrums = 0\n",
"popular_genre = 0\n",
"for num in dict_num_genres.values():\n",
" sum_spectrums += num\n",
" if num > popular_genre:\n",
" popular_genre = num\n",
"hasard = int((popular_genre / sum_spectrums)*100)\n",
"print('Hasard:', int((popular_genre / sum_spectrums)*100), \"%\")\"\"\""
] ]
}, },
{ {
"cell_type": "code", "cell_type": "code",
"execution_count": 78, "execution_count": 129,
"metadata": {}, "metadata": {},
"outputs": [ "outputs": [
{ {
"data": { "data": {
"text/html": [
"<div>\n",
"<style>\n",
" .dataframe thead tr:only-child th {\n",
" text-align: right;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: left;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>image</th>\n",
" <th>genre_action</th>\n",
" <th>genre_comedy</th>\n",
" <th>genre_drama</th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>5323</th>\n",
" <td>[[15, 13, 5], [26, 27, 17], [69, 78, 65], [66,...</td>\n",
" <td>0</td>\n",
" <td>1</td>\n",
" <td>0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>9303</th>\n",
" <td>[[31, 26, 25], [28, 32, 26], [58, 68, 62], [60...</td>\n",
" <td>0</td>\n",
" <td>1</td>\n",
" <td>0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>268</th>\n",
" <td>[[76, 115, 0], [78, 113, 3], [80, 112, 3], [80...</td>\n",
" <td>0</td>\n",
" <td>1</td>\n",
" <td>0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>10345</th>\n",
" <td>[[211, 205, 200], [214, 203, 199], [217, 202, ...</td>\n",
" <td>0</td>\n",
" <td>1</td>\n",
" <td>0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>7781</th>\n",
" <td>[[16, 8, 1], [16, 9, 0], [16, 10, 0], [16, 10,...</td>\n",
" <td>0</td>\n",
" <td>1</td>\n",
" <td>0</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"</div>"
],
"text/plain": [ "text/plain": [
"8068" " image genre_action \\\n",
"5323 [[15, 13, 5], [26, 27, 17], [69, 78, 65], [66,... 0 \n",
"9303 [[31, 26, 25], [28, 32, 26], [58, 68, 62], [60... 0 \n",
"268 [[76, 115, 0], [78, 113, 3], [80, 112, 3], [80... 0 \n",
"10345 [[211, 205, 200], [214, 203, 199], [217, 202, ... 0 \n",
"7781 [[16, 8, 1], [16, 9, 0], [16, 10, 0], [16, 10,... 0 \n",
"\n",
" genre_comedy genre_drama \n",
"5323 1 0 \n",
"9303 1 0 \n",
"268 1 0 \n",
"10345 1 0 \n",
"7781 1 0 "
] ]
}, },
"execution_count": 78, "execution_count": 129,
"metadata": {}, "metadata": {},
"output_type": "execute_result" "output_type": "execute_result"
} }
], ],
"source": [ "source": [
"train_len = int(df_sym.shape[0]*0.8)\n", "df4 = df3.sort_values('genre_comedy')\n",
"train = df_sym.iloc[:train_len, :]\n", "df4.tail()"
"test = df_sym.iloc[train_len:, :]\n",
"print(train['image'].head())\n",
"print(f\"train nb : {train.shape} \")"
] ]
}, },
{ {
"cell_type": "code", "cell_type": "code",
"execution_count": 79, "execution_count": 130,
"metadata": {}, "metadata": {
"collapsed": true
},
"outputs": [], "outputs": [],
"source": [] "source": [
"dfcom = df4.iloc[-897:, :]"
]
}, },
{ {
"cell_type": "code", "cell_type": "code",
"execution_count": 80, "execution_count": 131,
"metadata": {}, "metadata": {},
"outputs": [ "outputs": [
{ {
"data": { "data": {
"text/html": [
"<div>\n",
"<style>\n",
" .dataframe thead tr:only-child th {\n",
" text-align: right;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: left;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>image</th>\n",
" <th>genre_action</th>\n",
" <th>genre_comedy</th>\n",