diff --git a/data/__init__.py b/data/__init__.py new file mode 100644 index 00000000..e69de29b diff --git a/data/provider.py b/data/provider.py new file mode 100644 index 00000000..11bce949 --- /dev/null +++ b/data/provider.py @@ -0,0 +1,9 @@ +class DataProvider: + _instance = None + + def __new__(cls): + if cls._instance is None: + cls._instance = super(DataProvider, cls).__new__(cls) + + return cls._instance + \ No newline at end of file diff --git a/data/raw.py b/data/raw.py new file mode 100644 index 00000000..ad43f51e --- /dev/null +++ b/data/raw.py @@ -0,0 +1,67 @@ +import pandas as pd + +from utils.path import data_path + + +valid_data_sources = { + 'album_artist', + 'albums', + 'artists', + 'audio_features', + 'liked_tracks', + 'playlist_track', + 'playlists', + 'track_artist', + 'tracks', + 'artist_genre', + 'top_tracks', + 'top_artists' +} + +df_prefixes = { + "albums": "album_", + "tracks": "track_", + "audio_features": "audio_", + "playlists": "playlist_", + "artists": "artist_" +} + + +class RawData: + _instance = None + + def __new__(cls): + if cls._instance is None: + cls._instance = super(RawData, cls).__new__(cls) + + return cls._instance + + + def __init__(self): + self._data = {} + + + def __getitem__(self, key) -> pd.DataFrame: + if key not in valid_data_sources: + raise RuntimeError(f'Invalid data source {key}') + + if key not in self._data: + df = pd.read_csv(data_path(key)) + prefix = df_prefixes.get(key, None) + if prefix is not None: + prefix_df(df, prefix, set(df_prefixes.values())) + + self._data[key] = df + + return self._data[key] + + +def prefix_df(df: pd.DataFrame, prefix: str, prefixes: list[str]): + df.columns = [prefix_col(col, prefix, prefixes) for col in df.columns] + + +def prefix_col(col: str, prefix: str, prefixes: list[str]): + for other_prefix in prefixes: + if col.startswith(other_prefix): + return col + return prefix + col \ No newline at end of file diff --git a/summarize/summarize.py b/summarize/summarize.py index ecb061f7..9e573275 100644 --- a/summarize/summarize.py +++ b/summarize/summarize.py @@ -1,5 +1,6 @@ import pandas as pd +from data.raw import RawData from summarize.pages.artist import make_artist_summary from summarize.pages.genre import make_genre_summary from summarize.pages.label import make_label_summary @@ -9,31 +10,25 @@ from utils.album import short_album_name from utils.audio_features import set_tracks_full from utils.date import release_year -from utils.path import clear_markdown, data_path +from utils.path import clear_markdown from utils.record_label import standardize_record_labels -from utils.util import first, prefix_df +from utils.util import first def summarize_results(): - album_artist = pd.read_csv(data_path('album_artist')) - albums = pd.read_csv(data_path("albums")) - artists = pd.read_csv(data_path("artists")) - audio_features = pd.read_csv(data_path("audio_features")) - liked_tracks = pd.read_csv(data_path("liked_tracks")) - playlist_track = pd.read_csv(data_path("playlist_track")) - playlists = pd.read_csv(data_path("playlists")) - track_artist = pd.read_csv(data_path("track_artist")) - tracks = pd.read_csv(data_path("tracks")) - artist_genre = pd.read_csv(data_path("artist_genre")) - top_tracks = pd.read_csv(data_path("top_tracks")) - top_artists = pd.read_csv(data_path("top_artists")) - - prefixes = ["album_", "track_", "playlist_", "artist_"] - prefix_df(albums, "album_", prefixes) - prefix_df(tracks, "track_", prefixes) - prefix_df(audio_features, "audio_", prefixes) - prefix_df(playlists, "playlist_", prefixes) - prefix_df(artists, "artist_", prefixes) + raw_data = RawData() + album_artist = raw_data['album_artist'] + albums = raw_data["albums"] + artists = raw_data["artists"] + audio_features = raw_data["audio_features"] + liked_tracks = raw_data["liked_tracks"] + playlist_track = raw_data["playlist_track"] + playlists = raw_data["playlists"] + track_artist = raw_data["track_artist"] + tracks = raw_data["tracks"] + artist_genre = raw_data["artist_genre"] + top_tracks = raw_data["top_tracks"] + top_artists = raw_data["top_artists"] albums['album_release_year'] = albums['album_release_date'].apply(release_year) albums['album_short_name'] = albums['album_name'].apply(short_album_name) diff --git a/utils/util.py b/utils/util.py index 5f929a82..2229efdf 100644 --- a/utils/util.py +++ b/utils/util.py @@ -20,16 +20,8 @@ def file_name_friendly(text: str): return re.sub(r"[^a-z0-9]", "_", text.lower()) -def prefix_df(df: pd.DataFrame, prefix: str, prefixes: list[str]): - df.columns = [prefix_col(col, prefix, prefixes) for col in df.columns] -def prefix_col(col: str, prefix: str, prefixes: list[str]): - for other_prefix in prefixes: - if col.startswith(other_prefix): - return col - return prefix + col - def first(series: pd.Series): return None if len(series) == 0 else series.iloc[0]