Skip to content

Commit

Permalink
Refactor data loading
Browse files Browse the repository at this point in the history
  • Loading branch information
jbrown1618 committed Apr 27, 2023
1 parent fc1470e commit 39f6ff0
Show file tree
Hide file tree
Showing 5 changed files with 92 additions and 29 deletions.
Empty file added data/__init__.py
Empty file.
9 changes: 9 additions & 0 deletions data/provider.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,9 @@
class DataProvider:
_instance = None

def __new__(cls):
if cls._instance is None:
cls._instance = super(DataProvider, cls).__new__(cls)

return cls._instance

67 changes: 67 additions & 0 deletions data/raw.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,67 @@
import pandas as pd

from utils.path import data_path


valid_data_sources = {
'album_artist',
'albums',
'artists',
'audio_features',
'liked_tracks',
'playlist_track',
'playlists',
'track_artist',
'tracks',
'artist_genre',
'top_tracks',
'top_artists'
}

df_prefixes = {
"albums": "album_",
"tracks": "track_",
"audio_features": "audio_",
"playlists": "playlist_",
"artists": "artist_"
}


class RawData:
_instance = None

def __new__(cls):
if cls._instance is None:
cls._instance = super(RawData, cls).__new__(cls)

return cls._instance


def __init__(self):
self._data = {}


def __getitem__(self, key) -> pd.DataFrame:
if key not in valid_data_sources:
raise RuntimeError(f'Invalid data source {key}')

if key not in self._data:
df = pd.read_csv(data_path(key))
prefix = df_prefixes.get(key, None)
if prefix is not None:
prefix_df(df, prefix, set(df_prefixes.values()))

self._data[key] = df

return self._data[key]


def prefix_df(df: pd.DataFrame, prefix: str, prefixes: list[str]):
df.columns = [prefix_col(col, prefix, prefixes) for col in df.columns]


def prefix_col(col: str, prefix: str, prefixes: list[str]):
for other_prefix in prefixes:
if col.startswith(other_prefix):
return col
return prefix + col
37 changes: 16 additions & 21 deletions summarize/summarize.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
import pandas as pd

from data.raw import RawData
from summarize.pages.artist import make_artist_summary
from summarize.pages.genre import make_genre_summary
from summarize.pages.label import make_label_summary
Expand All @@ -9,31 +10,25 @@
from utils.album import short_album_name
from utils.audio_features import set_tracks_full
from utils.date import release_year
from utils.path import clear_markdown, data_path
from utils.path import clear_markdown
from utils.record_label import standardize_record_labels
from utils.util import first, prefix_df
from utils.util import first


def summarize_results():
album_artist = pd.read_csv(data_path('album_artist'))
albums = pd.read_csv(data_path("albums"))
artists = pd.read_csv(data_path("artists"))
audio_features = pd.read_csv(data_path("audio_features"))
liked_tracks = pd.read_csv(data_path("liked_tracks"))
playlist_track = pd.read_csv(data_path("playlist_track"))
playlists = pd.read_csv(data_path("playlists"))
track_artist = pd.read_csv(data_path("track_artist"))
tracks = pd.read_csv(data_path("tracks"))
artist_genre = pd.read_csv(data_path("artist_genre"))
top_tracks = pd.read_csv(data_path("top_tracks"))
top_artists = pd.read_csv(data_path("top_artists"))

prefixes = ["album_", "track_", "playlist_", "artist_"]
prefix_df(albums, "album_", prefixes)
prefix_df(tracks, "track_", prefixes)
prefix_df(audio_features, "audio_", prefixes)
prefix_df(playlists, "playlist_", prefixes)
prefix_df(artists, "artist_", prefixes)
raw_data = RawData()
album_artist = raw_data['album_artist']
albums = raw_data["albums"]
artists = raw_data["artists"]
audio_features = raw_data["audio_features"]
liked_tracks = raw_data["liked_tracks"]
playlist_track = raw_data["playlist_track"]
playlists = raw_data["playlists"]
track_artist = raw_data["track_artist"]
tracks = raw_data["tracks"]
artist_genre = raw_data["artist_genre"]
top_tracks = raw_data["top_tracks"]
top_artists = raw_data["top_artists"]

albums['album_release_year'] = albums['album_release_date'].apply(release_year)
albums['album_short_name'] = albums['album_name'].apply(short_album_name)
Expand Down
8 changes: 0 additions & 8 deletions utils/util.py
Original file line number Diff line number Diff line change
Expand Up @@ -20,16 +20,8 @@ def file_name_friendly(text: str):
return re.sub(r"[^a-z0-9]", "_", text.lower())


def prefix_df(df: pd.DataFrame, prefix: str, prefixes: list[str]):
df.columns = [prefix_col(col, prefix, prefixes) for col in df.columns]


def prefix_col(col: str, prefix: str, prefixes: list[str]):
for other_prefix in prefixes:
if col.startswith(other_prefix):
return col
return prefix + col


def first(series: pd.Series):
return None if len(series) == 0 else series.iloc[0]

0 comments on commit 39f6ff0

Please sign in to comment.