From 8c7915aa5043c20ecf78efc808fcf1917a105c63 Mon Sep 17 00:00:00 2001 From: LoganTilley Date: Thu, 27 May 2021 18:55:13 -0400 Subject: [PATCH] moved notebook to /notebooks, added (some) documentation --- model.ipynb | 1050 ----------------------------------------- models.py | 1 - notebooks/model.ipynb | 536 +++++++++++++++++++++ pages/process.py | 1 - run.py | 2 +- 5 files changed, 537 insertions(+), 1053 deletions(-) delete mode 100644 model.ipynb create mode 100644 notebooks/model.ipynb diff --git a/model.ipynb b/model.ipynb deleted file mode 100644 index b34066a..0000000 --- a/model.ipynb +++ /dev/null @@ -1,1050 +0,0 @@ -{ - "metadata": { - "language_info": { - "codemirror_mode": { - "name": "ipython", - "version": 3 - }, - "file_extension": ".py", - "mimetype": "text/x-python", - "name": "python", - "nbconvert_exporter": "python", - "pygments_lexer": "ipython3", - "version": "3.8.8" - }, - "orig_nbformat": 2, - "kernelspec": { - "name": "python388jvsc74a57bd035c03aa0d851cb6285394a035bf26f38eed74d8588e29c3dd9d14089f75b8b7c", - "display_name": "Python 3.8.8 64-bit ('NLP-MTXprD7X': pipenv)" - }, - "metadata": { - "interpreter": { - "hash": "1eb1274d9b46011db3fd7736afbfd78351ce92e021db6530fd6b2630829fc1c2" - } - } - }, - "nbformat": 4, - "nbformat_minor": 2, - "cells": [ - { - "cell_type": "code", - "execution_count": 2, - "metadata": {}, - "outputs": [], - "source": [ - "import tensorflow as tf\n", - "import numpy as np\n", - "import pandas as pd\n", - "import matplotlib.pyplot as plt\n", - "import sklearn as skl\n", - "from tensorflow import keras\n", - "from sklearn.neighbors import kneighbors_graph, NearestNeighbors\n", - "from tensorflow.keras.layers import Dense, LeakyReLU\n", - "from tensorflow.keras.optimizers import Adam, Nadam, RMSprop, SGD\n", - "from tensorflow.keras.utils import plot_model, to_categorical\n", - "from tensorflow.keras.models import Model, Sequential, save_model, load_model\n", - "from tensorflow.config import list_logical_devices\n", - "from tensorflow.keras.callbacks import TensorBoard" - ] - }, - { - "cell_type": "code", - "execution_count": 4, - "metadata": {}, - "outputs": [], - "source": [ - "df_full.to_csv('model_ready_data_no_dupes.csv')" - ] - }, - { - "cell_type": "code", - "execution_count": 3, - "metadata": {}, - "outputs": [], - "source": [ - "df_full = pd.read_csv(r'C:\\Users\\Logan\\Desktop\\model_ready_data_no_dupes.csv')" - ] - }, - { - "cell_type": "code", - "execution_count": 4, - "metadata": {}, - "outputs": [], - "source": [ - "df = df_full.select_dtypes('number')" - ] - }, - { - "cell_type": "code", - "execution_count": 5, - "metadata": {}, - "outputs": [], - "source": [ - "data = df.to_numpy()" - ] - }, - { - "cell_type": "code", - "execution_count": 6, - "metadata": {}, - "outputs": [ - { - "output_type": "execute_result", - "data": { - "text/plain": [ - "(524211, 15)" - ] - }, - "metadata": {}, - "execution_count": 6 - } - ], - "source": [ - "data.shape" - ] - }, - { - "cell_type": "code", - "execution_count": 5, - "metadata": {}, - "outputs": [ - { - "output_type": "execute_result", - "data": { - "text/plain": [ - "Index(['name', 'duration_ms', 'explicit', 'artists', 'release_date',\n", - " 'danceability', 'energy', 'key', 'loudness', 'mode', 'speechiness',\n", - " 'acousticness', 'instrumentalness', 'liveness', 'valence', 'tempo',\n", - " 'time_signature', 'popularity'],\n", - " dtype='object')" - ] - }, - "metadata": {}, - "execution_count": 5 - } - ], - "source": [ - "df_full.columns" - ] - }, - { - "cell_type": "code", - "execution_count": 86, - "metadata": {}, - "outputs": [], - "source": [ - "class AutoEncoder(Model):\n", - " def __init__(self):\n", - " super(AutoEncoder, self).__init__()\n", - " self.encoder = Sequential([\n", - " Dense(64, input_shape=(data.shape[1],), activation='relu'),\n", - " Dense(32, activation='relu'),\n", - " Dense(16, activation='relu'),\n", - " Dense(8, activation='gelu')])\n", - " self.decoder = Sequential([\n", - " Dense(16, activation='gelu'),\n", - " Dense(32, activation='relu'),\n", - " Dense(64, activation='relu'),\n", - " Dense(15, activation='relu')])\n", - "\n", - "\n", - " def call(self, x):\n", - " encoded = self.encoder(x)\n", - " decoded = self.decoder(encoded)\n", - " return decoded" - ] - }, - { - "cell_type": "code", - "execution_count": 212, - "metadata": {}, - "outputs": [], - "source": [ - "ae = AutoEncoder()\n", - "ae.compile(optimizer='adam', loss='mae')" - ] - }, - { - "cell_type": "code", - "execution_count": 213, - "metadata": {}, - "outputs": [ - { - "output_type": "stream", - "name": "stdout", - "text": [ - "Epoch 1/10\n", - "14744/14744 [==============================] - 13s 853us/step - loss: 0.1855 - val_loss: 0.1753\n", - "Epoch 2/10\n", - "14744/14744 [==============================] - 13s 856us/step - loss: 0.1559 - val_loss: 0.1564\n", - "Epoch 3/10\n", - "14744/14744 [==============================] - 12s 839us/step - loss: 0.1524 - val_loss: 0.1531\n", - "Epoch 4/10\n", - "14744/14744 [==============================] - 12s 836us/step - loss: 0.1509 - val_loss: 0.1536\n", - "Epoch 5/10\n", - "14744/14744 [==============================] - 12s 839us/step - loss: 0.1506 - val_loss: 0.1528\n", - "Epoch 6/10\n", - "14744/14744 [==============================] - 12s 836us/step - loss: 0.1504 - val_loss: 0.1520\n", - "Epoch 7/10\n", - "14744/14744 [==============================] - 13s 856us/step - loss: 0.1503 - val_loss: 0.1529\n", - "Epoch 8/10\n", - "14744/14744 [==============================] - 12s 842us/step - loss: 0.1501 - val_loss: 0.1527\n", - "Epoch 9/10\n", - "14744/14744 [==============================] - 12s 830us/step - loss: 0.1500 - val_loss: 0.1521\n", - "Epoch 10/10\n", - "14744/14744 [==============================] - 12s 841us/step - loss: 0.1498 - val_loss: 0.1526\n" - ] - }, - { - "output_type": "execute_result", - "data": { - "text/plain": [ - "" - ] - }, - "metadata": {}, - "execution_count": 213 - } - ], - "source": [ - "ae.fit(data, data, shuffle=True, epochs=10, workers=10, use_multiprocessing=True, validation_split=.1)" - ] - }, - { - "cell_type": "code", - "execution_count": 214, - "metadata": {}, - "outputs": [], - "source": [ - "encoded_data = ae.encoder(data)" - ] - }, - { - "cell_type": "code", - "execution_count": 215, - "metadata": {}, - "outputs": [ - { - "output_type": "execute_result", - "data": { - "text/plain": [ - "" - ] - }, - "metadata": {}, - "execution_count": 215 - } - ], - "source": [ - "encoded_data" - ] - }, - { - "cell_type": "code", - "execution_count": 216, - "metadata": {}, - "outputs": [ - { - "output_type": "execute_result", - "data": { - "text/plain": [ - "NearestNeighbors(n_jobs=-1, radius=1)" - ] - }, - "metadata": {}, - "execution_count": 216 - } - ], - "source": [ - "knn = NearestNeighbors(n_neighbors=5, radius=1, n_jobs=-1)\n", - "knn.fit(encoded_data)" - ] - }, - { - "cell_type": "code", - "execution_count": 93, - "metadata": {}, - "outputs": [], - "source": [ - "model_2 = AutoEncoder()\n", - "loss = tf.keras.losses.MeanAbsoluteError()\n", - "model_2.compile(optimizer='nadam', loss=loss)" - ] - }, - { - "cell_type": "code", - "execution_count": 94, - "metadata": {}, - "outputs": [ - { - "output_type": "stream", - "name": "stdout", - "text": [ - "Epoch 1/10\n", - "14744/14744 [==============================] - 14s 837us/step - loss: 0.1466 - val_loss: 0.1339\n", - "Epoch 2/10\n", - "14744/14744 [==============================] - 12s 820us/step - loss: 0.1347 - val_loss: 0.1262\n", - "Epoch 3/10\n", - "14744/14744 [==============================] - 12s 824us/step - loss: 0.1037 - val_loss: 0.0880\n", - "Epoch 4/10\n", - "14744/14744 [==============================] - 12s 825us/step - loss: 0.0902 - val_loss: 0.0853\n", - "Epoch 5/10\n", - "14744/14744 [==============================] - 12s 835us/step - loss: 0.0832 - val_loss: 0.0727\n", - "Epoch 6/10\n", - "14744/14744 [==============================] - 12s 835us/step - loss: 0.0753 - val_loss: 0.0731\n", - "Epoch 7/10\n", - "14744/14744 [==============================] - 12s 817us/step - loss: 0.0751 - val_loss: 0.0726\n", - "Epoch 8/10\n", - "14744/14744 [==============================] - 12s 811us/step - loss: 0.0750 - val_loss: 0.0714\n", - "Epoch 9/10\n", - "14744/14744 [==============================] - 12s 819us/step - loss: 0.0748 - val_loss: 0.0713\n", - "Epoch 10/10\n", - "14744/14744 [==============================] - 12s 824us/step - loss: 0.0468 - val_loss: 0.0380\n" - ] - }, - { - "output_type": "execute_result", - "data": { - "text/plain": [ - "" - ] - }, - "metadata": {}, - "execution_count": 94 - } - ], - "source": [ - "model_2.fit(data, data, epochs=10, shuffle=True, validation_split=.1)" - ] - }, - { - "cell_type": "code", - "execution_count": 95, - "metadata": {}, - "outputs": [], - "source": [ - "encoded_data_2 = model_2.encoder(data)" - ] - }, - { - "cell_type": "code", - "execution_count": 96, - "metadata": {}, - "outputs": [ - { - "output_type": "execute_result", - "data": { - "text/plain": [ - "NearestNeighbors(n_jobs=-1)" - ] - }, - "metadata": {}, - "execution_count": 96 - } - ], - "source": [ - "knn_2 = NearestNeighbors(n_neighbors=5, n_jobs=-1)\n", - "knn_2.fit(encoded_data_2)" - ] - }, - { - "cell_type": "code", - "execution_count": 97, - "metadata": {}, - "outputs": [], - "source": [ - "class AutoEncoder2(Model):\n", - " def __init__(self, alpha):\n", - " super(AutoEncoder2, self).__init__()\n", - " self.encoder = Sequential([\n", - " Dense(32, input_shape=data.shape[1:]),\n", - " LeakyReLU(alpha),\n", - " Dense(16),\n", - " LeakyReLU(alpha),\n", - " Dense(5),\n", - " LeakyReLU(alpha)])\n", - " self.decoder = Sequential([\n", - " Dense(16),\n", - " LeakyReLU(alpha),\n", - " Dense(32),\n", - " LeakyReLU(alpha),\n", - " Dense(15),\n", - " LeakyReLU(alpha)])\n", - " \n", - "\n", - " def call(self, x):\n", - " encoded = self.encoder(x)\n", - " decoded = self.decoder(encoded)\n", - " return decoded" - ] - }, - { - "cell_type": "code", - "execution_count": 217, - "metadata": {}, - "outputs": [], - "source": [ - "ae2 = AutoEncoder2(.1)\n", - "ae2.compile(optimizer='adam', loss='mae')" - ] - }, - { - "cell_type": "code", - "execution_count": 218, - "metadata": {}, - "outputs": [ - { - "output_type": "stream", - "name": "stdout", - "text": [ - "Epoch 1/10\n", - "14744/14744 [==============================] - 11s 716us/step - loss: 0.0616 - val_loss: 0.0483\n", - "Epoch 2/10\n", - "14744/14744 [==============================] - 10s 696us/step - loss: 0.0529 - val_loss: 0.0465\n", - "Epoch 3/10\n", - "14744/14744 [==============================] - 10s 700us/step - loss: 0.0504 - val_loss: 0.0459\n", - "Epoch 4/10\n", - "14744/14744 [==============================] - 10s 689us/step - loss: 0.0490 - val_loss: 0.0445\n", - "Epoch 5/10\n", - "14744/14744 [==============================] - 10s 686us/step - loss: 0.0486 - val_loss: 0.0452\n", - "Epoch 6/10\n", - "14744/14744 [==============================] - 10s 703us/step - loss: 0.0485 - val_loss: 0.0441\n", - "Epoch 7/10\n", - "14744/14744 [==============================] - 10s 680us/step - loss: 0.0484 - val_loss: 0.0442\n", - "Epoch 8/10\n", - "14744/14744 [==============================] - 10s 679us/step - loss: 0.0483 - val_loss: 0.0444\n", - "Epoch 9/10\n", - "14744/14744 [==============================] - 10s 675us/step - loss: 0.0481 - val_loss: 0.0441\n", - "Epoch 10/10\n", - "14744/14744 [==============================] - 10s 672us/step - loss: 0.0476 - val_loss: 0.0423\n" - ] - }, - { - "output_type": "execute_result", - "data": { - "text/plain": [ - "" - ] - }, - "metadata": {}, - "execution_count": 218 - } - ], - "source": [ - "ae2.fit(data, data, epochs=10, validation_split=.1, workers=10, use_multiprocessing=True, shuffle=True)" - ] - }, - { - "cell_type": "code", - "execution_count": 219, - "metadata": {}, - "outputs": [], - "source": [ - "encoded_data_3 = ae2.encoder(data)" - ] - }, - { - "cell_type": "code", - "execution_count": 220, - "metadata": {}, - "outputs": [ - { - "output_type": "execute_result", - "data": { - "text/plain": [ - "NearestNeighbors(n_jobs=-1)" - ] - }, - "metadata": {}, - "execution_count": 220 - } - ], - "source": [ - "knn_3 = NearestNeighbors(n_neighbors=5, n_jobs=-1)\n", - "knn_3.fit(encoded_data_3)" - ] - }, - { - "cell_type": "code", - "execution_count": 252, - "metadata": {}, - "outputs": [], - "source": [ - "ae4 = AutoEncoder2(.3)\n", - "ae4.compile(optimizer='rmsprop', loss='mae')" - ] - }, - { - "cell_type": "code", - "execution_count": 253, - "metadata": {}, - "outputs": [ - { - "output_type": "stream", - "name": "stdout", - "text": [ - "Epoch 1/10\n", - "14744/14744 [==============================] - 10s 642us/step - loss: 0.0648 - val_loss: 0.0542\n", - "Epoch 2/10\n", - "14744/14744 [==============================] - 9s 637us/step - loss: 0.0579 - val_loss: 0.0526\n", - "Epoch 3/10\n", - "14744/14744 [==============================] - 10s 654us/step - loss: 0.0510 - val_loss: 0.0452\n", - "Epoch 4/10\n", - "14744/14744 [==============================] - 10s 649us/step - loss: 0.0485 - val_loss: 0.0428\n", - "Epoch 5/10\n", - "14744/14744 [==============================] - 9s 642us/step - loss: 0.0477 - val_loss: 0.0431\n", - "Epoch 6/10\n", - "14744/14744 [==============================] - 9s 644us/step - loss: 0.0467 - val_loss: 0.0418\n", - "Epoch 7/10\n", - "14744/14744 [==============================] - 9s 631us/step - loss: 0.0455 - val_loss: 0.0399\n", - "Epoch 8/10\n", - "14744/14744 [==============================] - 9s 636us/step - loss: 0.0448 - val_loss: 0.0420\n", - "Epoch 9/10\n", - "14744/14744 [==============================] - 9s 637us/step - loss: 0.0443 - val_loss: 0.0414\n", - "Epoch 10/10\n", - "14744/14744 [==============================] - 9s 642us/step - loss: 0.0434 - val_loss: 0.0386\n" - ] - }, - { - "output_type": "execute_result", - "data": { - "text/plain": [ - "" - ] - }, - "metadata": {}, - "execution_count": 253 - } - ], - "source": [ - "ae4.fit(data, data, epochs=10, validation_split=.1, workers=10, use_multiprocessing=True, shuffle=True)" - ] - }, - { - "cell_type": "code", - "execution_count": 257, - "metadata": {}, - "outputs": [ - { - "output_type": "stream", - "name": "stdout", - "text": [ - "INFO:tensorflow:Assets written to: ae4\\assets\n" - ] - } - ], - "source": [ - "ae4.save('ae4')" - ] - }, - { - "cell_type": "code", - "execution_count": 16, - "metadata": {}, - "outputs": [], - "source": [ - "ae4 = load_model('ae4')" - ] - }, - { - "cell_type": "code", - "execution_count": 17, - "metadata": {}, - "outputs": [ - { - "output_type": "execute_result", - "data": { - "text/plain": [ - "NearestNeighbors(n_jobs=-1)" - ] - }, - "metadata": {}, - "execution_count": 17 - } - ], - "source": [ - "encoded_data_4 = ae4.encoder(data)\n", - "knn4 = NearestNeighbors(n_jobs=-1)\n", - "knn4.fit(encoded_data_4)" - ] - }, - { - "cell_type": "code", - "execution_count": 19, - "metadata": {}, - "outputs": [ - { - "output_type": "execute_result", - "data": { - "text/plain": [ - "['encoded_data.joblib']" - ] - }, - "metadata": {}, - "execution_count": 19 - } - ], - "source": [ - "joblib.dump(encoded_data_4, 'encoded_data.joblib')" - ] - }, - { - "cell_type": "code", - "execution_count": 255, - "metadata": {}, - "outputs": [], - "source": [ - "query = 72837\n", - "\n", - "_, ind = knn.kneighbors([encoded_data[query]])\n", - "_, ind2 = knn_2.kneighbors([encoded_data_2[query]])\n", - "_, ind3 = knn_3.kneighbors([encoded_data_3[query]])\n", - "_, ind4 = knn4.kneighbors([encoded_data_4[query]])" - ] - }, - { - "cell_type": "code", - "execution_count": 243, - "metadata": {}, - "outputs": [ - { - "output_type": "execute_result", - "data": { - "text/plain": [ - " name duration_ms explicit artists \\\n", - "72837 duality 0.044939 0 [\"'slipknot'\"] \n", - "76442 use somebody 0.041052 0 [\"'kings of leon'\"] \n", - "74796 never too late 0.037240 0 [\"'three days grace'\"] \n", - "72828 mr. brightside 0.039666 0 [\"'the killers'\"] \n", - "165065 crazy girl 0.035705 0 [\"'eli young band'\"] \n", - "\n", - " release_date danceability energy key loudness mode \\\n", - "72837 2004 0.354 0.982 0.363636 0.871482 0 \n", - "76442 2008-09-23 0.276 0.715 0.000000 0.835842 1 \n", - "74796 2006-06-25 0.433 0.778 0.181818 0.846182 0 \n", - "72828 2004 0.352 0.911 0.090909 0.837769 1 \n", - "165065 2011-01-01 0.366 0.651 0.363636 0.859000 1 \n", - "\n", - " speechiness acousticness instrumentalness liveness valence \\\n", - "72837 0.1680 0.000237 0.000294 0.2040 0.194 \n", - "76442 0.0432 0.005520 0.000417 0.2010 0.173 \n", - "74796 0.0371 0.008380 0.000000 0.1630 0.217 \n", - "72828 0.0747 0.001210 0.000000 0.0995 0.236 \n", - "165065 0.0310 0.000415 0.005260 0.2710 0.257 \n", - "\n", - " tempo time_signature popularity \n", - "72837 0.583535 0.8 0.78 \n", - "76442 0.556163 0.8 0.77 \n", - "74796 0.608261 0.8 0.70 \n", - "72828 0.600830 0.8 0.82 \n", - "165065 0.614264 0.6 0.69 " - ], - "text/html": "
\n\n\n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n
nameduration_msexplicitartistsrelease_datedanceabilityenergykeyloudnessmodespeechinessacousticnessinstrumentalnesslivenessvalencetempotime_signaturepopularity
72837duality0.0449390[\"'slipknot'\"]20040.3540.9820.3636360.87148200.16800.0002370.0002940.20400.1940.5835350.80.78
76442use somebody0.0410520[\"'kings of leon'\"]2008-09-230.2760.7150.0000000.83584210.04320.0055200.0004170.20100.1730.5561630.80.77
74796never too late0.0372400[\"'three days grace'\"]2006-06-250.4330.7780.1818180.84618200.03710.0083800.0000000.16300.2170.6082610.80.70
72828mr. brightside0.0396660[\"'the killers'\"]20040.3520.9110.0909090.83776910.07470.0012100.0000000.09950.2360.6008300.80.82
165065crazy girl0.0357050[\"'eli young band'\"]2011-01-010.3660.6510.3636360.85900010.03100.0004150.0052600.27100.2570.6142640.60.69
\n
" - }, - "metadata": {}, - "execution_count": 243 - } - ], - "source": [ - "df_full.iloc[ind[0]]" - ] - }, - { - "cell_type": "code", - "execution_count": 244, - "metadata": {}, - "outputs": [ - { - "output_type": "execute_result", - "data": { - "text/plain": [ - " name duration_ms explicit artists \\\n", - "72837 duality 0.044939 0 [\"'slipknot'\"] \n", - "50622 ace of spades 0.029695 0 [\"'motörhead'\"] \n", - "71173 times like these 0.047242 0 [\"'foo fighters'\"] \n", - "74801 knights of cydonia 0.065148 0 [\"'muse'\"] \n", - "73759 best of you 0.045475 0 [\"'foo fighters'\"] \n", - "\n", - " release_date danceability energy key loudness mode \\\n", - "72837 2004 0.354 0.982 0.363636 0.871482 0 \n", - "50622 1980-11-08 0.329 0.974 0.272727 0.783621 0 \n", - "71173 2002-10-22 0.376 0.908 0.363636 0.859750 0 \n", - "74801 2006-06-19 0.366 0.963 1.000000 0.836683 0 \n", - "73759 2005-06-14 0.366 0.940 0.090909 0.839467 0 \n", - "\n", - " speechiness acousticness instrumentalness liveness valence \\\n", - "72837 0.1680 0.000237 0.000294 0.2040 0.194 \n", - "50622 0.1350 0.000852 0.000118 0.0904 0.234 \n", - "71173 0.0879 0.000014 0.000014 0.2410 0.266 \n", - "74801 0.1420 0.000273 0.012200 0.1150 0.211 \n", - "73759 0.0696 0.000769 0.000094 0.1880 0.369 \n", - "\n", - " tempo time_signature popularity \n", - "72837 0.583535 0.8 0.78 \n", - "50622 0.571724 0.8 0.75 \n", - "71173 0.586819 0.8 0.68 \n", - "74801 0.556512 0.8 0.69 \n", - "73759 0.528442 0.8 0.76 " - ], - "text/html": "
\n\n\n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n
nameduration_msexplicitartistsrelease_datedanceabilityenergykeyloudnessmodespeechinessacousticnessinstrumentalnesslivenessvalencetempotime_signaturepopularity
72837duality0.0449390[\"'slipknot'\"]20040.3540.9820.3636360.87148200.16800.0002370.0002940.20400.1940.5835350.80.78
50622ace of spades0.0296950[\"'motörhead'\"]1980-11-080.3290.9740.2727270.78362100.13500.0008520.0001180.09040.2340.5717240.80.75
71173times like these0.0472420[\"'foo fighters'\"]2002-10-220.3760.9080.3636360.85975000.08790.0000140.0000140.24100.2660.5868190.80.68
74801knights of cydonia0.0651480[\"'muse'\"]2006-06-190.3660.9631.0000000.83668300.14200.0002730.0122000.11500.2110.5565120.80.69
73759best of you0.0454750[\"'foo fighters'\"]2005-06-140.3660.9400.0909090.83946700.06960.0007690.0000940.18800.3690.5284420.80.76
\n
" - }, - "metadata": {}, - "execution_count": 244 - } - ], - "source": [ - "df_full.iloc[ind2[0]]" - ] - }, - { - "cell_type": "code", - "execution_count": 245, - "metadata": {}, - "outputs": [ - { - "output_type": "execute_result", - "data": { - "text/plain": [ - " name duration_ms \\\n", - "72837 duality 0.044939 \n", - "164331 inside out 0.040267 \n", - "79753 kyoto (feat. sirah) 0.035719 \n", - "352434 final round 0.029492 \n", - "54565 creeping death - creeping death ep version / r... 0.070395 \n", - "\n", - " explicit artists release_date danceability \\\n", - "72837 0 [\"'slipknot'\"] 2004 0.354 \n", - "164331 1 [\"'five finger death punch'\"] 2019-12-02 0.476 \n", - "79753 1 [\"'skrillex'\", \"'sirah'\"] 2011-12-27 0.605 \n", - "352434 0 [\"'free flow flava'\"] 2019-10-04 0.342 \n", - "54565 0 [\"'metallica'\"] 1984-07-26 0.251 \n", - "\n", - " energy key loudness mode speechiness acousticness \\\n", - "72837 0.982 0.363636 0.871482 0 0.1680 0.000237 \n", - "164331 0.991 0.363636 0.874098 0 0.1500 0.001910 \n", - "79753 0.920 0.363636 0.882617 0 0.0713 0.001200 \n", - "352434 0.939 0.363636 0.798168 0 0.1160 0.002480 \n", - "54565 0.978 0.363636 0.833716 0 0.1660 0.000254 \n", - "\n", - " instrumentalness liveness valence tempo time_signature \\\n", - "72837 0.000294 0.2040 0.194 0.583535 0.8 \n", - "164331 0.000000 0.1210 0.202 0.336864 0.8 \n", - "79753 0.000596 0.4830 0.219 0.710177 0.8 \n", - "352434 0.857000 0.1790 0.243 0.696795 0.8 \n", - "54565 0.315000 0.0798 0.170 0.409212 0.8 \n", - "\n", - " popularity \n", - "72837 0.78 \n", - "164331 0.62 \n", - "79753 0.61 \n", - "352434 0.56 \n", - "54565 0.57 " - ], - "text/html": "
\n\n\n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n
nameduration_msexplicitartistsrelease_datedanceabilityenergykeyloudnessmodespeechinessacousticnessinstrumentalnesslivenessvalencetempotime_signaturepopularity
72837duality0.0449390[\"'slipknot'\"]20040.3540.9820.3636360.87148200.16800.0002370.0002940.20400.1940.5835350.80.78
164331inside out0.0402671[\"'five finger death punch'\"]2019-12-020.4760.9910.3636360.87409800.15000.0019100.0000000.12100.2020.3368640.80.62
79753kyoto (feat. sirah)0.0357191[\"'skrillex'\", \"'sirah'\"]2011-12-270.6050.9200.3636360.88261700.07130.0012000.0005960.48300.2190.7101770.80.61
352434final round0.0294920[\"'free flow flava'\"]2019-10-040.3420.9390.3636360.79816800.11600.0024800.8570000.17900.2430.6967950.80.56
54565creeping death - creeping death ep version / r...0.0703950[\"'metallica'\"]1984-07-260.2510.9780.3636360.83371600.16600.0002540.3150000.07980.1700.4092120.80.57
\n
" - }, - "metadata": {}, - "execution_count": 245 - } - ], - "source": [ - "df_full.iloc[ind3[0]]" - ] - }, - { - "cell_type": "code", - "execution_count": 256, - "metadata": {}, - "outputs": [ - { - "output_type": "execute_result", - "data": { - "text/plain": [ - " name duration_ms explicit artists \\\n", - "72837 duality 0.044939 0 [\"'slipknot'\"] \n", - "71911 bring me to life 0.041965 0 [\"'evanescence'\"] \n", - "77786 eyeless 0.042048 1 [\"'slipknot'\"] \n", - "50622 ace of spades 0.029695 0 [\"'motörhead'\"] \n", - "73246 give 'em hell, kid 0.024699 0 [\"'my chemical romance'\"] \n", - "\n", - " release_date danceability energy key loudness mode \\\n", - "72837 2004 0.354 0.982 0.363636 0.871482 0 \n", - "71911 2003-03-04 0.331 0.943 0.363636 0.869004 0 \n", - "77786 2009-09-09 0.293 0.997 0.363636 0.851016 0 \n", - "50622 1980-11-08 0.329 0.974 0.272727 0.783621 0 \n", - "73246 2004-06-08 0.252 0.993 0.363636 0.870671 0 \n", - "\n", - " speechiness acousticness instrumentalness liveness valence \\\n", - "72837 0.1680 0.000237 0.000294 0.2040 0.194 \n", - "71911 0.0698 0.007210 0.000002 0.2420 0.296 \n", - "77786 0.2170 0.000463 0.000039 0.4150 0.130 \n", - "50622 0.1350 0.000852 0.000118 0.0904 0.234 \n", - "73246 0.1600 0.023400 0.000000 0.2280 0.118 \n", - "\n", - " tempo time_signature popularity \n", - "72837 0.583535 0.8 0.78 \n", - "71911 0.384007 0.8 0.81 \n", - "77786 0.407288 0.8 0.61 \n", - "50622 0.571724 0.8 0.75 \n", - "73246 0.745889 0.8 0.62 " - ], - "text/html": "
\n\n\n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n
nameduration_msexplicitartistsrelease_datedanceabilityenergykeyloudnessmodespeechinessacousticnessinstrumentalnesslivenessvalencetempotime_signaturepopularity
72837duality0.0449390[\"'slipknot'\"]20040.3540.9820.3636360.87148200.16800.0002370.0002940.20400.1940.5835350.80.78
71911bring me to life0.0419650[\"'evanescence'\"]2003-03-040.3310.9430.3636360.86900400.06980.0072100.0000020.24200.2960.3840070.80.81
77786eyeless0.0420481[\"'slipknot'\"]2009-09-090.2930.9970.3636360.85101600.21700.0004630.0000390.41500.1300.4072880.80.61
50622ace of spades0.0296950[\"'motörhead'\"]1980-11-080.3290.9740.2727270.78362100.13500.0008520.0001180.09040.2340.5717240.80.75
73246give 'em hell, kid0.0246990[\"'my chemical romance'\"]2004-06-080.2520.9930.3636360.87067100.16000.0234000.0000000.22800.1180.7458890.80.62
\n
" - }, - "metadata": {}, - "execution_count": 256 - } - ], - "source": [ - "df_full.iloc[ind4[0]]" - ] - }, - { - "cell_type": "code", - "execution_count": 204, - "metadata": {}, - "outputs": [ - { - "output_type": "execute_result", - "data": { - "text/plain": [ - " name duration_ms explicit artists \\\n", - "70349 people = shit 0.038355 1 [\"'slipknot'\"] \n", - "70486 the heretic anthem 0.045186 1 [\"'slipknot'\"] \n", - "70494 left behind 0.042956 0 [\"'slipknot'\"] \n", - "70657 disasterpiece 0.054847 1 [\"'slipknot'\"] \n", - "70700 my plague 0.039190 1 [\"'slipknot'\"] \n", - "70951 everything ends 0.045205 1 [\"'slipknot'\"] \n", - "72837 duality 0.044939 0 [\"'slipknot'\"] \n", - "72846 before i forget 0.049562 0 [\"'slipknot'\"] \n", - "72985 vermilion, pt. 2 0.039906 0 [\"'slipknot'\"] \n", - "73263 vermilion 0.056334 0 [\"'slipknot'\"] \n", - "73449 the blister exists 0.056825 0 [\"'slipknot'\"] \n", - "73483 pulse of the maggots 0.046206 0 [\"'slipknot'\"] \n", - "73669 the nameless 0.047684 0 [\"'slipknot'\"] \n", - "76500 psychosocial 0.050544 0 [\"'slipknot'\"] \n", - "76564 snuff 0.049126 0 [\"'slipknot'\"] \n", - "76712 dead memories 0.047781 0 [\"'slipknot'\"] \n", - "76910 sulfur 0.049420 0 [\"'slipknot'\"] \n", - "77320 wait and bleed 0.026300 0 [\"'slipknot'\"] \n", - "77530 spit it out 0.028423 1 [\"'slipknot'\"] \n", - "77617 (sic) 0.035558 1 [\"'slipknot'\"] \n", - "77786 eyeless 0.042048 1 [\"'slipknot'\"] \n", - "77887 surfacing 0.038798 1 [\"'slipknot'\"] \n", - "87114 unsainted 0.046391 1 [\"'slipknot'\"] \n", - "106255 the devil in i 0.060987 0 [\"'slipknot'\"] \n", - "121082 all hope is gone 0.050658 0 [\"'slipknot'\"] \n", - "121141 vendetta 0.056128 1 [\"'slipknot'\"] \n", - "121199 liberate 0.032788 1 [\"'slipknot'\"] \n", - "121243 purity 0.047223 1 [\"'slipknot'\"] \n", - "159703 gently 0.052302 0 [\"'slipknot'\"] \n", - "159717 i am hated 0.028077 1 [\"'slipknot'\"] \n", - "159725 the shape 0.038722 1 [\"'slipknot'\"] \n", - "160445 opium of the people 0.034261 0 [\"'slipknot'\"] \n", - "161371 'til we die 0.061498 0 [\"'slipknot'\"] \n", - "162793 killpop 0.040039 1 [\"'slipknot'\"] \n", - "162805 custer 0.045269 1 [\"'slipknot'\"] \n", - "162917 the negative one 0.057847 1 [\"'slipknot'\"] \n", - "163910 all out life 0.060509 1 [\"'slipknot'\"] \n", - "164178 nero forte 0.056069 1 [\"'slipknot'\"] \n", - "240672 circle 0.046785 0 [\"'slipknot'\"] \n", - "240692 three nil 0.051291 1 [\"'slipknot'\"] \n", - "289703 scream 0.048386 0 [\"'slipknot'\"] \n", - "289802 prelude 3.0 0.042214 0 [\"'slipknot'\"] \n", - "289807 welcome 0.034763 0 [\"'slipknot'\"] \n", - "289832 the virus of life 0.057893 0 [\"'slipknot'\"] \n", - "322288 eeyore 0.029855 1 [\"'slipknot'\"] \n", - "322305 get this 0.021953 1 [\"'slipknot'\"] \n", - "322308 no life 0.029775 1 [\"'slipknot'\"] \n", - "379072 gematria (the killing name) 0.064316 1 [\"'slipknot'\"] \n", - "379158 only one 0.025707 1 [\"'slipknot'\"] \n", - "419922 me inside 0.028362 1 [\"'slipknot'\"] \n", - "\n", - " release_date danceability energy key loudness mode \\\n", - "70349 2001 0.462 0.996 1.000000 0.872293 0 \n", - "70486 2001 0.451 0.993 0.090909 0.854396 1 \n", - "70494 2001 0.229 0.994 0.545455 0.865287 1 \n", - "70657 2001 0.395 0.988 1.000000 0.855620 0 \n", - "70700 2001 0.279 0.993 1.000000 0.876927 0 \n", - "70951 2001 0.467 0.992 1.000000 0.868545 0 \n", - "72837 2004 0.354 0.982 0.363636 0.871482 0 \n", - "72846 2004 0.291 0.974 0.545455 0.871925 0 \n", - "72985 2004 0.589 0.442 0.363636 0.787491 1 \n", - "73263 2004 0.219 0.989 1.000000 0.872461 0 \n", - "73449 2004 0.392 0.993 0.545455 0.878992 1 \n", - "73483 2004 0.218 0.994 0.636364 0.883780 1 \n", - "73669 2004 0.338 0.983 0.636364 0.880614 1 \n", - "76500 2008-08-20 0.568 0.981 0.181818 0.858281 1 \n", - "76564 2008-08-22 0.544 0.690 0.545455 0.830228 0 \n", - "76712 2008-08-22 0.547 0.963 0.363636 0.882021 0 \n", - "76910 2008-08-22 0.278 0.985 1.000000 0.888185 1 \n", - "77320 2009-09-09 0.382 0.996 0.636364 0.854763 1 \n", - "77530 2009-09-09 0.331 0.946 0.636364 0.847421 1 \n", - "77617 2009-09-09 0.380 0.994 0.636364 0.846825 1 \n", - "77786 2009-09-09 0.293 0.997 0.363636 0.851016 0 \n", - "77887 2009-09-09 0.443 0.993 1.000000 0.848675 0 \n", - "87114 2019-08-09 0.427 0.946 0.636364 0.884300 1 \n", - "106255 2014-10-15 0.398 0.939 0.818182 0.873945 0 \n", - "121082 2008-08-22 0.556 0.996 0.090909 0.879268 1 \n", - "121141 2008-08-22 0.338 0.974 0.181818 0.880660 1 \n", - "121199 2009-09-09 0.334 0.990 0.090909 0.857180 1 \n", - "121243 2009-09-09 0.283 0.978 0.545455 0.831926 1 \n", - "159703 2001 0.295 0.961 0.181818 0.847329 1 \n", - "159717 2001 0.435 0.986 0.636364 0.864874 1 \n", - "159725 2001 0.363 0.993 0.636364 0.867046 1 \n", - "160445 2004 0.352 0.995 1.000000 0.892269 1 \n", - "161371 2008-08-22 0.482 0.751 0.363636 0.841394 1 \n", - "162793 2014-10-15 0.306 0.926 0.090909 0.875642 0 \n", - "162805 2014-10-15 0.407 0.980 0.636364 0.893600 1 \n", - "162917 2014-10-15 0.505 0.979 0.000000 0.892300 1 \n", - "163910 2018-10-31 0.499 0.970 0.000000 0.874358 1 \n", - "164178 2019-08-09 0.336 0.978 0.636364 0.881700 1 \n", - "240672 2004 0.353 0.760 0.363636 0.839681 0 \n", - "240692 2004 0.311 0.993 0.636364 0.879772 1 \n", - "289703 2004 0.323 0.982 0.181818 0.868224 1 \n", - "289802 2004 0.279 0.717 0.181818 0.836377 1 \n", - "289807 2004 0.339 0.985 0.545455 0.883856 0 \n", - "289832 2004 0.563 0.983 0.545455 0.844392 0 \n", - "322288 2009-09-09 0.330 0.997 0.636364 0.838641 1 \n", - "322305 2009-09-09 0.350 0.992 0.090909 0.840813 0 \n", - "322308 2009-09-09 0.280 0.983 0.636364 0.847803 1 \n", - "379072 2008-08-22 0.361 0.992 1.000000 0.876621 1 \n", - "379158 2009-09-09 0.302 0.976 0.545455 0.847543 1 \n", - "419922 1999-06-12 0.315 0.989 0.090909 0.848828 1 \n", - "\n", - " speechiness acousticness instrumentalness liveness valence \\\n", - "70349 0.2310 0.000058 0.003770 0.1390 0.1450 \n", - "70486 0.1850 0.000562 0.004790 0.1520 0.1400 \n", - "70494 0.2200 0.000070 0.005480 0.3180 0.1150 \n", - "70657 0.2460 0.000128 0.000244 0.1340 0.1890 \n", - "70700 0.2090 0.000278 0.000394 0.1970 0.1070 \n", - "70951 0.1750 0.000078 0.001750 0.4100 0.3170 \n", - "72837 0.1680 0.000237 0.000294 0.2040 0.1940 \n", - "72846 0.1710 0.017700 0.000005 0.8860 0.3480 \n", - "72985 0.0291 0.334000 0.000424 0.0987 0.0904 \n", - "73263 0.1600 0.004210 0.015600 0.3750 0.1490 \n", - "73449 0.1340 0.000568 0.006700 0.2720 0.3510 \n", - "73483 0.2910 0.003010 0.000003 0.2280 0.1120 \n", - "73669 0.1800 0.015500 0.000743 0.5020 0.2200 \n", - "76500 0.0887 0.002690 0.002750 0.0243 0.3070 \n", - "76564 0.0425 0.012000 0.009500 0.0615 0.2190 \n", - "76712 0.0744 0.000443 0.025100 0.3530 0.4410 \n", - "76910 0.1290 0.000234 0.000235 0.1040 0.3420 \n", - "77320 0.1040 0.002080 0.000000 0.4170 0.3270 \n", - "77530 0.0775 0.030900 0.000006 0.2540 0.5340 \n", - "77617 0.1240 0.000536 0.001280 0.0956 0.2670 \n", - "77786 0.2170 0.000463 0.000039 0.4150 0.1300 \n", - "77887 0.1010 0.000456 0.043700 0.3070 0.2760 \n", - "87114 0.0469 0.000118 0.025300 0.0604 0.2370 \n", - "106255 0.0648 0.005910 0.000881 0.3570 0.2350 \n", - "121082 0.1710 0.002210 0.000142 0.2650 0.2060 \n", - "121141 0.0648 0.000170 0.001040 0.0271 0.4590 \n", - "121199 0.1870 0.000779 0.000000 0.0744 0.3950 \n", - "121243 0.1800 0.000105 0.073500 0.2520 0.3990 \n", - "159703 0.0997 0.000024 0.670000 0.2030 0.1960 \n", - "159717 0.1820 0.000083 0.000924 0.5720 0.4340 \n", - "159725 0.1930 0.000105 0.000473 0.1390 0.2130 \n", - "160445 0.1810 0.007610 0.000026 0.5110 0.1380 \n", - "161371 0.0467 0.137000 0.000000 0.1620 0.5210 \n", - "162793 0.0912 0.000311 0.000644 0.0776 0.4830 \n", - "162805 0.1070 0.001740 0.000112 0.1190 0.5370 \n", - "162917 0.0715 0.000200 0.001350 0.3340 0.3120 \n", - "163910 0.1160 0.000683 0.136000 0.0871 0.0506 \n", - "164178 0.0846 0.000437 0.000525 0.1380 0.4960 \n", - "240672 0.0344 0.165000 0.024500 0.3690 0.2730 \n", - "240692 0.2000 0.000232 0.000010 0.2000 0.1050 \n", - "289703 0.1470 0.002690 0.000002 0.3010 0.3210 \n", - "289802 0.0468 0.000317 0.167000 0.2240 0.2100 \n", - "289807 0.1610 0.004630 0.000000 0.2320 0.3950 \n", - "289832 0.1270 0.000219 0.604000 0.9440 0.2200 \n", - "322288 0.2830 0.000502 0.002720 0.4620 0.0595 \n", - "322305 0.2690 0.017600 0.000000 0.7420 0.1110 \n", - "322308 0.1420 0.000233 0.000060 0.3050 0.5000 \n", - "379072 0.1360 0.000189 0.000613 0.2850 0.3200 \n", - "379158 0.1470 0.003830 0.000002 0.5830 0.6810 \n", - "419922 0.1840 0.000389 0.000007 0.1930 0.1450 \n", - "\n", - " tempo time_signature popularity \n", - "70349 0.495460 0.8 0.66 \n", - "70486 0.409232 0.8 0.63 \n", - "70494 0.635694 0.8 0.63 \n", - "70657 0.437063 0.8 0.60 \n", - "70700 0.359415 0.8 0.60 \n", - "70951 0.467605 0.8 0.57 \n", - "72837 0.583535 0.8 0.78 \n", - "72846 0.546812 0.8 0.75 \n", - "72985 0.497960 0.6 0.67 \n", - "73263 0.682151 0.6 0.62 \n", - "73449 0.386791 0.8 0.59 \n", - "73483 0.355015 0.8 0.59 \n", - "73669 0.429976 0.8 0.57 \n", - "76500 0.548524 0.8 0.72 \n", - "76564 0.503131 0.8 0.69 \n", - "76712 0.527886 0.8 0.64 \n", - "76910 0.425370 0.8 0.61 \n", - "77320 0.378864 0.8 0.70 \n", - "77530 0.568084 0.8 0.64 \n", - "77617 0.390960 0.8 0.63 \n", - "77786 0.407288 0.8 0.61 \n", - "77887 0.433779 0.8 0.60 \n", - "87114 0.411172 0.8 0.74 \n", - "106255 0.373515 0.8 0.72 \n", - "121082 0.406123 0.8 0.56 \n", - "121141 0.549848 0.8 0.56 \n", - "121199 0.589177 0.8 0.57 \n", - "121243 0.674476 0.8 0.57 \n", - "159703 0.555570 0.8 0.52 \n", - "159717 0.510681 0.8 0.49 \n", - "159725 0.443305 0.8 0.48 \n", - "160445 0.544937 0.8 0.51 \n", - "161371 0.600890 0.6 0.52 \n", - "162793 0.643191 0.8 0.63 \n", - "162805 0.531965 0.8 0.60 \n", - "162917 0.439157 0.8 0.58 \n", - "163910 0.432493 0.8 0.67 \n", - "164178 0.526997 0.8 0.69 \n", - "240672 0.485102 0.8 0.51 \n", - "240692 0.463806 0.8 0.50 \n", - "289703 0.506171 0.8 0.48 \n", - "289802 0.542177 0.6 0.47 \n", - "289807 0.401715 0.8 0.48 \n", - "289832 0.517832 0.8 0.46 \n", - "322288 0.485805 0.8 0.51 \n", - "322305 0.470860 0.8 0.49 \n", - "322308 0.623737 0.8 0.49 \n", - "379072 0.412284 0.8 0.50 \n", - "379158 0.685710 0.8 0.46 \n", - "419922 0.508623 0.8 0.41 " - ], - "text/html": "
\n\n\n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n
nameduration_msexplicitartistsrelease_datedanceabilityenergykeyloudnessmodespeechinessacousticnessinstrumentalnesslivenessvalencetempotime_signaturepopularity
70349people = shit0.0383551[\"'slipknot'\"]20010.4620.9961.0000000.87229300.23100.0000580.0037700.13900.14500.4954600.80.66
70486the heretic anthem0.0451861[\"'slipknot'\"]20010.4510.9930.0909090.85439610.18500.0005620.0047900.15200.14000.4092320.80.63
70494left behind0.0429560[\"'slipknot'\"]20010.2290.9940.5454550.86528710.22000.0000700.0054800.31800.11500.6356940.80.63
70657disasterpiece0.0548471[\"'slipknot'\"]20010.3950.9881.0000000.85562000.24600.0001280.0002440.13400.18900.4370630.80.60
70700my plague0.0391901[\"'slipknot'\"]20010.2790.9931.0000000.87692700.20900.0002780.0003940.19700.10700.3594150.80.60
70951everything ends0.0452051[\"'slipknot'\"]20010.4670.9921.0000000.86854500.17500.0000780.0017500.41000.31700.4676050.80.57
72837duality0.0449390[\"'slipknot'\"]20040.3540.9820.3636360.87148200.16800.0002370.0002940.20400.19400.5835350.80.78
72846before i forget0.0495620[\"'slipknot'\"]20040.2910.9740.5454550.87192500.17100.0177000.0000050.88600.34800.5468120.80.75
72985vermilion, pt. 20.0399060[\"'slipknot'\"]20040.5890.4420.3636360.78749110.02910.3340000.0004240.09870.09040.4979600.60.67
73263vermilion0.0563340[\"'slipknot'\"]20040.2190.9891.0000000.87246100.16000.0042100.0156000.37500.14900.6821510.60.62
73449the blister exists0.0568250[\"'slipknot'\"]20040.3920.9930.5454550.87899210.13400.0005680.0067000.27200.35100.3867910.80.59
73483pulse of the maggots0.0462060[\"'slipknot'\"]20040.2180.9940.6363640.88378010.29100.0030100.0000030.22800.11200.3550150.80.59
73669the nameless0.0476840[\"'slipknot'\"]20040.3380.9830.6363640.88061410.18000.0155000.0007430.50200.22000.4299760.80.57
76500psychosocial0.0505440[\"'slipknot'\"]2008-08-200.5680.9810.1818180.85828110.08870.0026900.0027500.02430.30700.5485240.80.72
76564snuff0.0491260[\"'slipknot'\"]2008-08-220.5440.6900.5454550.83022800.04250.0120000.0095000.06150.21900.5031310.80.69
76712dead memories0.0477810[\"'slipknot'\"]2008-08-220.5470.9630.3636360.88202100.07440.0004430.0251000.35300.44100.5278860.80.64
76910sulfur0.0494200[\"'slipknot'\"]2008-08-220.2780.9851.0000000.88818510.12900.0002340.0002350.10400.34200.4253700.80.61
77320wait and bleed0.0263000[\"'slipknot'\"]2009-09-090.3820.9960.6363640.85476310.10400.0020800.0000000.41700.32700.3788640.80.70
77530spit it out0.0284231[\"'slipknot'\"]2009-09-090.3310.9460.6363640.84742110.07750.0309000.0000060.25400.53400.5680840.80.64
77617(sic)0.0355581[\"'slipknot'\"]2009-09-090.3800.9940.6363640.84682510.12400.0005360.0012800.09560.26700.3909600.80.63
77786eyeless0.0420481[\"'slipknot'\"]2009-09-090.2930.9970.3636360.85101600.21700.0004630.0000390.41500.13000.4072880.80.61
77887surfacing0.0387981[\"'slipknot'\"]2009-09-090.4430.9931.0000000.84867500.10100.0004560.0437000.30700.27600.4337790.80.60
87114unsainted0.0463911[\"'slipknot'\"]2019-08-090.4270.9460.6363640.88430010.04690.0001180.0253000.06040.23700.4111720.80.74
106255the devil in i0.0609870[\"'slipknot'\"]2014-10-150.3980.9390.8181820.87394500.06480.0059100.0008810.35700.23500.3735150.80.72
121082all hope is gone0.0506580[\"'slipknot'\"]2008-08-220.5560.9960.0909090.87926810.17100.0022100.0001420.26500.20600.4061230.80.56
121141vendetta0.0561281[\"'slipknot'\"]2008-08-220.3380.9740.1818180.88066010.06480.0001700.0010400.02710.45900.5498480.80.56
121199liberate0.0327881[\"'slipknot'\"]2009-09-090.3340.9900.0909090.85718010.18700.0007790.0000000.07440.39500.5891770.80.57
121243purity0.0472231[\"'slipknot'\"]2009-09-090.2830.9780.5454550.83192610.18000.0001050.0735000.25200.39900.6744760.80.57
159703gently0.0523020[\"'slipknot'\"]20010.2950.9610.1818180.84732910.09970.0000240.6700000.20300.19600.5555700.80.52
159717i am hated0.0280771[\"'slipknot'\"]20010.4350.9860.6363640.86487410.18200.0000830.0009240.57200.43400.5106810.80.49
159725the shape0.0387221[\"'slipknot'\"]20010.3630.9930.6363640.86704610.19300.0001050.0004730.13900.21300.4433050.80.48
160445opium of the people0.0342610[\"'slipknot'\"]20040.3520.9951.0000000.89226910.18100.0076100.0000260.51100.13800.5449370.80.51
161371'til we die0.0614980[\"'slipknot'\"]2008-08-220.4820.7510.3636360.84139410.04670.1370000.0000000.16200.52100.6008900.60.52
162793killpop0.0400391[\"'slipknot'\"]2014-10-150.3060.9260.0909090.87564200.09120.0003110.0006440.07760.48300.6431910.80.63
162805custer0.0452691[\"'slipknot'\"]2014-10-150.4070.9800.6363640.89360010.10700.0017400.0001120.11900.53700.5319650.80.60
162917the negative one0.0578471[\"'slipknot'\"]2014-10-150.5050.9790.0000000.89230010.07150.0002000.0013500.33400.31200.4391570.80.58
163910all out life0.0605091[\"'slipknot'\"]2018-10-310.4990.9700.0000000.87435810.11600.0006830.1360000.08710.05060.4324930.80.67
164178nero forte0.0560691[\"'slipknot'\"]2019-08-090.3360.9780.6363640.88170010.08460.0004370.0005250.13800.49600.5269970.80.69
240672circle0.0467850[\"'slipknot'\"]20040.3530.7600.3636360.83968100.03440.1650000.0245000.36900.27300.4851020.80.51
240692three nil0.0512911[\"'slipknot'\"]20040.3110.9930.6363640.87977210.20000.0002320.0000100.20000.10500.4638060.80.50
289703scream0.0483860[\"'slipknot'\"]20040.3230.9820.1818180.86822410.14700.0026900.0000020.30100.32100.5061710.80.48
289802prelude 3.00.0422140[\"'slipknot'\"]20040.2790.7170.1818180.83637710.04680.0003170.1670000.22400.21000.5421770.60.47
289807welcome0.0347630[\"'slipknot'\"]20040.3390.9850.5454550.88385600.16100.0046300.0000000.23200.39500.4017150.80.48
289832the virus of life0.0578930[\"'slipknot'\"]20040.5630.9830.5454550.84439200.12700.0002190.6040000.94400.22000.5178320.80.46
322288eeyore0.0298551[\"'slipknot'\"]2009-09-090.3300.9970.6363640.83864110.28300.0005020.0027200.46200.05950.4858050.80.51
322305get this0.0219531[\"'slipknot'\"]2009-09-090.3500.9920.0909090.84081300.26900.0176000.0000000.74200.11100.4708600.80.49
322308no life0.0297751[\"'slipknot'\"]2009-09-090.2800.9830.6363640.84780310.14200.0002330.0000600.30500.50000.6237370.80.49
379072gematria (the killing name)0.0643161[\"'slipknot'\"]2008-08-220.3610.9921.0000000.87662110.13600.0001890.0006130.28500.32000.4122840.80.50
379158only one0.0257071[\"'slipknot'\"]2009-09-090.3020.9760.5454550.84754310.14700.0038300.0000020.58300.68100.6857100.80.46
419922me inside0.0283621[\"'slipknot'\"]1999-06-120.3150.9890.0909090.84882810.18400.0003890.0000070.19300.14500.5086230.80.41
\n
" - }, - "metadata": {}, - "execution_count": 204 - } - ], - "source": [ - "df_full[df_full['artists'].str.contains('slipkn') == True].head(50)" - ] - }, - { - "cell_type": "code", - "execution_count": 8, - "metadata": {}, - "outputs": [], - "source": [ - "model = load_model('ae4')" - ] - } - ] -} \ No newline at end of file diff --git a/models.py b/models.py index 6871aee..1a24f71 100644 --- a/models.py +++ b/models.py @@ -32,5 +32,4 @@ class spotify(db.Model): def __repr__(self): return f"" - \ No newline at end of file diff --git a/notebooks/model.ipynb b/notebooks/model.ipynb new file mode 100644 index 0000000..6bef229 --- /dev/null +++ b/notebooks/model.ipynb @@ -0,0 +1,536 @@ +{ + "metadata": { + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.8.8" + }, + "orig_nbformat": 2, + "kernelspec": { + "name": "python388jvsc74a57bd035c03aa0d851cb6285394a035bf26f38eed74d8588e29c3dd9d14089f75b8b7c", + "display_name": "Python 3.8.8 64-bit ('NLP-MTXprD7X': pipenv)" + }, + "metadata": { + "interpreter": { + "hash": "1eb1274d9b46011db3fd7736afbfd78351ce92e021db6530fd6b2630829fc1c2" + } + } + }, + "nbformat": 4, + "nbformat_minor": 2, + "cells": [ + { + "cell_type": "code", + "execution_count": 1, + "metadata": {}, + "outputs": [], + "source": [ + "import tensorflow as tf\n", + "import numpy as np\n", + "import pandas as pd\n", + "import matplotlib.pyplot as plt\n", + "import sklearn as skl\n", + "from tensorflow import keras\n", + "from sklearn.neighbors import kneighbors_graph, NearestNeighbors\n", + "from tensorflow.keras.layers import Dense, LeakyReLU\n", + "from tensorflow.keras.optimizers import Adam, Nadam, RMSprop, SGD\n", + "from tensorflow.keras.utils import plot_model, to_categorical\n", + "from tensorflow.keras.models import Model, Sequential, save_model, load_model\n", + "from tensorflow.config import list_logical_devices\n", + "from tensorflow.keras.callbacks import TensorBoard" + ] + }, + { + "source": [ + "### Data Preparation\n", + "\n", + "#### We removed null values, entries with duplicate song name / artists pairs, and normalized the numerical features for use in a neural network.\n", + "#### We also case-normalized the text in the data to make things a bit easier on the queries.\n", + "\n", + "### Model Architecture\n", + "\n", + "#### I tried 4 different models built with 2 different major architectures. Both architectures are autoencoders, but have some slight differences.\n", + "\n", + "#### The first model architecture is deeper and wider than the second, with a larger latent vector. After I tried a few different optimizers and loss functions,\n", + "\n", + "#### I settled on mean absolute error for the loss function for each model as it gave the best looking results.\n", + "\n", + "#### The second model architecture uses LeakyReLU activation functions, is smaller, and has a smaller latent vector. The second model made from this architecture, dubbed\n", + "#### a very plain name of ae4 (autoencoder 4) utilizes RMSProp as the optimizer, and each LeakyReLU has an alpha of 0.3.\n", + "\n", + "#### After looking at the output for each model with various songs, it seemed that ae4 had the most consistently understandable recommendations, so I saved that model, its encoded vectors for the entire dataset, and the K-NearestNeighbors model used to relate the encoded vectors for recommendation. These are what is used in the application." + ], + "cell_type": "markdown", + "metadata": {} + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "# df_full.to_csv('model_ready_data_no_dupes.csv')" + ] + }, + { + "cell_type": "code", + "execution_count": 2, + "metadata": {}, + "outputs": [], + "source": [ + "# Completely cleaned data\n", + "df_full = pd.read_csv(r'C:\\Users\\Logan\\Desktop\\model_ready_data_no_dupes.csv')" + ] + }, + { + "cell_type": "code", + "execution_count": 3, + "metadata": {}, + "outputs": [], + "source": [ + "# Save numerical features for model in separate dataframe\n", + "df = df_full.select_dtypes('number')" + ] + }, + { + "cell_type": "code", + "execution_count": 4, + "metadata": {}, + "outputs": [], + "source": [ + "# Convert to numpy array\n", + "data = df.to_numpy()" + ] + }, + { + "cell_type": "code", + "execution_count": 5, + "metadata": {}, + "outputs": [ + { + "output_type": "execute_result", + "data": { + "text/plain": [ + "(524211, 15)" + ] + }, + "metadata": {}, + "execution_count": 5 + } + ], + "source": [ + "data.shape" + ] + }, + { + "cell_type": "code", + "execution_count": 8, + "metadata": {}, + "outputs": [ + { + "output_type": "execute_result", + "data": { + "text/plain": [ + "Index(['duration_ms', 'explicit', 'danceability', 'energy', 'key', 'loudness',\n", + " 'mode', 'speechiness', 'acousticness', 'instrumentalness', 'liveness',\n", + " 'valence', 'tempo', 'time_signature', 'popularity'],\n", + " dtype='object')" + ] + }, + "metadata": {}, + "execution_count": 8 + } + ], + "source": [ + "df.columns" + ] + }, + { + "cell_type": "code", + "execution_count": 6, + "metadata": {}, + "outputs": [ + { + "output_type": "execute_result", + "data": { + "text/plain": [ + "Index(['name', 'duration_ms', 'explicit', 'artists', 'release_date',\n", + " 'danceability', 'energy', 'key', 'loudness', 'mode', 'speechiness',\n", + " 'acousticness', 'instrumentalness', 'liveness', 'valence', 'tempo',\n", + " 'time_signature', 'popularity'],\n", + " dtype='object')" + ] + }, + "metadata": {}, + "execution_count": 6 + } + ], + "source": [ + "df_full.columns" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "# Tensorflow subclass API\n", + "\n", + "class AutoEncoder(Model):\n", + " def __init__(self):\n", + " #Inherit init from Model base class\n", + " super(AutoEncoder, self).__init__()\n", + "\n", + " # Encoder portion utilizing Keras Sequential\n", + " self.encoder = Sequential([\n", + " Dense(64, input_shape=(data.shape[1],), activation='relu'),\n", + " Dense(32, activation='relu'),\n", + " Dense(16, activation='relu'),\n", + " Dense(8, activation='gelu')])\n", + " # Encoder portion utilizing Keras Sequential\n", + " self.decoder = Sequential([\n", + " Dense(16, activation='gelu'),\n", + " Dense(32, activation='relu'),\n", + " Dense(64, activation='relu'),\n", + " Dense(15, activation='relu')])\n", + "\n", + " # This function is used by fit to pass data through both the encoder and decoder\n", + " def call(self, x):\n", + " encoded = self.encoder(x)\n", + " decoded = self.decoder(encoded)\n", + " return decoded" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "ae = AutoEncoder()\n", + "ae.compile(optimizer='adam', loss='mae')" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "ae.fit(data, data, shuffle=True, epochs=10, workers=10, use_multiprocessing=True, validation_split=.1)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "encoded_data = ae.encoder(data)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "encoded_data" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "knn = NearestNeighbors(n_neighbors=5, radius=1, n_jobs=-1)\n", + "knn.fit(encoded_data)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "model_2 = AutoEncoder()\n", + "loss = tf.keras.losses.MeanAbsoluteError()\n", + "model_2.compile(optimizer='nadam', loss=loss)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "model_2.fit(data, data, epochs=10, shuffle=True, validation_split=.1)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "encoded_data_2 = model_2.encoder(data)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "knn_2 = NearestNeighbors(n_neighbors=5, n_jobs=-1)\n", + "knn_2.fit(encoded_data_2)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "# Same as above, just a different architecture\n", + "class AutoEncoder2(Model):\n", + " def __init__(self, alpha):\n", + " super(AutoEncoder2, self).__init__()\n", + "\n", + " self.encoder = Sequential([\n", + " Dense(32, input_shape=data.shape[1:]),\n", + " LeakyReLU(alpha),\n", + " Dense(16),\n", + " LeakyReLU(alpha),\n", + " Dense(5),\n", + " LeakyReLU(alpha)])\n", + "\n", + " self.decoder = Sequential([\n", + " Dense(16),\n", + " LeakyReLU(alpha),\n", + " Dense(32),\n", + " LeakyReLU(alpha),\n", + " Dense(15),\n", + " LeakyReLU(alpha)])\n", + " \n", + "\n", + " def call(self, x):\n", + " encoded = self.encoder(x)\n", + " decoded = self.decoder(encoded)\n", + " return decoded" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "ae2 = AutoEncoder2(.1)\n", + "ae2.compile(optimizer='adam', loss='mae')" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "ae2.fit(data, data, epochs=10, validation_split=.1, workers=10, use_multiprocessing=True, shuffle=True)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "encoded_data_3 = ae2.encoder(data)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "knn_3 = NearestNeighbors(n_neighbors=5, n_jobs=-1)\n", + "knn_3.fit(encoded_data_3)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "ae4 = AutoEncoder2(.3)\n", + "ae4.compile(optimizer='rmsprop', loss='mae')" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "ae4.fit(data, data, epochs=10, validation_split=.1, workers=10, use_multiprocessing=True, shuffle=True)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "# ae4.save('ae4')" + ] + }, + { + "cell_type": "code", + "execution_count": 10, + "metadata": {}, + "outputs": [], + "source": [ + "ae4 = load_model('assets/ae4')" + ] + }, + { + "cell_type": "code", + "execution_count": 11, + "metadata": {}, + "outputs": [ + { + "output_type": "execute_result", + "data": { + "text/plain": [ + "NearestNeighbors(n_jobs=-1)" + ] + }, + "metadata": {}, + "execution_count": 11 + } + ], + "source": [ + "encoded_data_4 = ae4.encoder(data)\n", + "knn4 = NearestNeighbors(n_jobs=-1)\n", + "knn4.fit(encoded_data_4)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "# joblib.dump(encoded_data_4, 'encoded_data.joblib')" + ] + }, + { + "cell_type": "code", + "execution_count": 13, + "metadata": {}, + "outputs": [], + "source": [ + "query = 72837\n", + "\n", + "# _, ind = knn.kneighbors([encoded_data[query]])\n", + "# _, ind2 = knn_2.kneighbors([encoded_data_2[query]])\n", + "# _, ind3 = knn_3.kneighbors([encoded_data_3[query]])\n", + "_, ind4 = knn4.kneighbors([encoded_data_4[query]])" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "df_full.iloc[ind[0]]" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "df_full.iloc[ind2[0]]" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "df_full.iloc[ind3[0]]" + ] + }, + { + "cell_type": "code", + "execution_count": 14, + "metadata": {}, + "outputs": [ + { + "output_type": "execute_result", + "data": { + "text/plain": [ + " name duration_ms explicit artists \\\n", + "72837 duality 0.044939 0 [\"'slipknot'\"] \n", + "71911 bring me to life 0.041965 0 [\"'evanescence'\"] \n", + "77786 eyeless 0.042048 1 [\"'slipknot'\"] \n", + "50622 ace of spades 0.029695 0 [\"'motörhead'\"] \n", + "73246 give 'em hell, kid 0.024699 0 [\"'my chemical romance'\"] \n", + "\n", + " release_date danceability energy key loudness mode \\\n", + "72837 2004 0.354 0.982 0.363636 0.871482 0 \n", + "71911 2003-03-04 0.331 0.943 0.363636 0.869004 0 \n", + "77786 2009-09-09 0.293 0.997 0.363636 0.851016 0 \n", + "50622 1980-11-08 0.329 0.974 0.272727 0.783621 0 \n", + "73246 2004-06-08 0.252 0.993 0.363636 0.870671 0 \n", + "\n", + " speechiness acousticness instrumentalness liveness valence \\\n", + "72837 0.1680 0.000237 0.000294 0.2040 0.194 \n", + "71911 0.0698 0.007210 0.000002 0.2420 0.296 \n", + "77786 0.2170 0.000463 0.000039 0.4150 0.130 \n", + "50622 0.1350 0.000852 0.000118 0.0904 0.234 \n", + "73246 0.1600 0.023400 0.000000 0.2280 0.118 \n", + "\n", + " tempo time_signature popularity \n", + "72837 0.583535 0.8 0.78 \n", + "71911 0.384007 0.8 0.81 \n", + "77786 0.407288 0.8 0.61 \n", + "50622 0.571724 0.8 0.75 \n", + "73246 0.745889 0.8 0.62 " + ], + "text/html": "
\n\n\n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n
nameduration_msexplicitartistsrelease_datedanceabilityenergykeyloudnessmodespeechinessacousticnessinstrumentalnesslivenessvalencetempotime_signaturepopularity
72837duality0.0449390[\"'slipknot'\"]20040.3540.9820.3636360.87148200.16800.0002370.0002940.20400.1940.5835350.80.78
71911bring me to life0.0419650[\"'evanescence'\"]2003-03-040.3310.9430.3636360.86900400.06980.0072100.0000020.24200.2960.3840070.80.81
77786eyeless0.0420481[\"'slipknot'\"]2009-09-090.2930.9970.3636360.85101600.21700.0004630.0000390.41500.1300.4072880.80.61
50622ace of spades0.0296950[\"'motörhead'\"]1980-11-080.3290.9740.2727270.78362100.13500.0008520.0001180.09040.2340.5717240.80.75
73246give 'em hell, kid0.0246990[\"'my chemical romance'\"]2004-06-080.2520.9930.3636360.87067100.16000.0234000.0000000.22800.1180.7458890.80.62
\n
" + }, + "metadata": {}, + "execution_count": 14 + } + ], + "source": [ + "df_full.iloc[ind4[0]]" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "df_full[df_full['artists'].str.contains('slipkn') == True].head(50)" + ] + } + ] +} \ No newline at end of file diff --git a/pages/process.py b/pages/process.py index 1299c58..ebbcf90 100644 --- a/pages/process.py +++ b/pages/process.py @@ -17,7 +17,6 @@ ## Process - """ ), diff --git a/run.py b/run.py index 7dbff3f..60b4c32 100644 --- a/run.py +++ b/run.py @@ -79,4 +79,4 @@ def display_page(pathname): # Run app server: https://dash.plot.ly/getting-started if __name__ == '__main__': - app.run_server(debug=True) \ No newline at end of file + app.run_server(debug=True)