Skip to content

refactor: offload thumbnail rendering to multiple processes #953

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Draft
wants to merge 9 commits into
base: main
Choose a base branch
from
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
34 changes: 22 additions & 12 deletions src/tagstudio/core/library/alchemy/library.py
Original file line number Diff line number Diff line change
Expand Up @@ -84,7 +84,9 @@
)
from tagstudio.core.library.alchemy.visitors import SQLBoolExpressionBuilder
from tagstudio.core.library.json.library import Library as JsonLibrary
from tagstudio.qt import cache_manager
from tagstudio.qt.translations import Translations
from tagstudio.qt.widgets.thumb_renderer import ThumbnailManager

if TYPE_CHECKING:
from sqlalchemy import Select
Expand Down Expand Up @@ -212,6 +214,8 @@ class Library:
folder: Folder | None
included_files: set[Path] = set()

thumbnail_manager: ThumbnailManager

SQL_FILENAME: str = "ts_library.sqlite"
JSON_FILENAME: str = "ts_library.json"

Expand All @@ -222,6 +226,7 @@ def close(self):
self.storage_path = None
self.folder = None
self.included_files = set()
self.thumbnail_manager.close()

def migrate_json_to_sqlite(self, json_lib: JsonLibrary):
"""Migrate JSON library data to the SQLite database."""
Expand Down Expand Up @@ -332,6 +337,7 @@ def tag_display_name(self, tag_id: int) -> str:
return tag.name

def open_library(self, library_dir: Path, storage_path: Path | None = None) -> LibraryStatus:
self.thumbnail_manager = ThumbnailManager(library_dir)
is_new: bool = True
if storage_path == ":memory:":
self.storage_path = storage_path
Expand Down Expand Up @@ -837,7 +843,12 @@ def remove_entries(self, entry_ids: list[int]) -> None:
entry_ids[i : i + MAX_SQL_VARIABLES]
for i in range(0, len(entry_ids), MAX_SQL_VARIABLES)
]:
session.query(Entry).where(Entry.id.in_(sub_list)).delete()
paths = session.scalars(
delete(Entry).where(Entry.id.in_(sub_list)).returning(Entry.path)
)
for path in paths:
cache_folder = self.thumbnail_manager.cache_folder
cache_manager.remove_from_cache(cache_folder, path)
session.commit()

def has_path_entry(self, path: Path) -> bool:
Expand Down Expand Up @@ -985,19 +996,18 @@ def update_entry_path(self, entry_id: int | Entry, path: Path) -> bool:
if isinstance(entry_id, Entry):
entry_id = entry_id.id

old_path = None
with Session(self.engine) as session:
update_stmt = (
update(Entry)
.where(
and_(
Entry.id == entry_id,
)
)
.values(path=path)
)

session.execute(update_stmt)
entry = session.scalar(select(Entry).where(Entry.id == entry_id))
if entry is None:
return False
old_path = entry.path
session.execute(update(Entry).where(Entry.id == entry_id).values(path=path))
session.commit()
if old_path is not None:
cache_folder = self.thumbnail_manager.cache_folder
cache_manager.remove_from_cache(cache_folder, old_path)

return True

def remove_tag(self, tag: Tag):
Expand Down
198 changes: 21 additions & 177 deletions src/tagstudio/qt/cache_manager.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,189 +2,33 @@
# Licensed under the GPL-3.0 License.
# Created for TagStudio: https://github.com/CyanVoxel/TagStudio

import contextlib
import math
import typing
from datetime import datetime as dt
from pathlib import Path

import structlog
from PIL import Image

from tagstudio.core.constants import THUMB_CACHE_NAME, TS_FOLDER_NAME
from tagstudio.core.singleton import Singleton

# Only import for type checking/autocompletion, will not be imported at runtime.
if typing.TYPE_CHECKING:
from tagstudio.core.library import Library

logger = structlog.get_logger(__name__)


class CacheManager(metaclass=Singleton):
FOLDER_SIZE = 10000000 # Each cache folder assumed to be 10 MiB
size_limit = 500000000 # 500 MiB default

folder_dict: dict[Path, int] = {}

def __init__(self):
self.lib: Library | None = None
self.last_lib_path: Path | None = None

@staticmethod
def clear_cache(library_dir: Path | None) -> bool:
"""Clear all files and folders within the cached folder.

Returns:
bool: True if successfully deleted, else False.
"""
cleared = True

if library_dir:
tree: Path = library_dir / TS_FOLDER_NAME / THUMB_CACHE_NAME

for folder in tree.glob("*"):
for file in folder.glob("*"):
# NOTE: On macOS with non-native file systems, this will commonly raise
# FileNotFound errors due to trying to delete "._" files that have
# already been deleted: https://bugs.python.org/issue29699
with contextlib.suppress(FileNotFoundError):
file.unlink()
try:
folder.rmdir()
with contextlib.suppress(KeyError):
CacheManager.folder_dict.pop(folder)
except Exception as e:
logger.error(
"[CacheManager] Couldn't unlink empty cache folder!",
error=e,
folder=folder,
tree=tree,
)

for _ in tree.glob("*"):
cleared = False

if cleared:
logger.info("[CacheManager] Cleared cache!")
else:
logger.error("[CacheManager] Couldn't delete cache!", tree=tree)

return cleared

def set_library(self, library):
"""Set the TagStudio library for the cache manager."""
self.lib = library
self.last_lib_path = self.lib.library_dir
if library.library_dir:
self.check_folder_status()

def cache_dir(self) -> Path | None:
"""Return the current cache directory, not including folder slugs."""
if not self.lib.library_dir:
return None
return Path(self.lib.library_dir / TS_FOLDER_NAME / THUMB_CACHE_NAME)

def save_image(self, image: Image.Image, path: Path, mode: str = "RGBA"):
"""Save an image to the cache."""
folder = self.get_current_folder()
if folder:
image_path: Path = folder / path
image.save(image_path, mode=mode)
with contextlib.suppress(KeyError):
CacheManager.folder_dict[folder] += image_path.stat().st_size

def check_folder_status(self):
"""Check the status of the cache folders.

This includes registering existing ones and creating new ones if needed.
"""
if (
(self.last_lib_path != self.lib.library_dir)
or not self.cache_dir()
or not self.cache_dir().exists()
):
self.register_existing_folders()

def create_folder() -> Path | None:
"""Create a new cache folder."""
if not self.lib.library_dir:
return None
folder_path = Path(self.cache_dir() / str(math.floor(dt.timestamp(dt.now()))))
logger.info("[CacheManager] Creating new folder", folder=folder_path)
try:
folder_path.mkdir(exist_ok=True)
except NotADirectoryError:
logger.error("[CacheManager] Not a directory", path=folder_path)
return folder_path

# Get size of most recent folder, if any exist.
if CacheManager.folder_dict:
last_folder = sorted(CacheManager.folder_dict.keys())[-1]

if CacheManager.folder_dict[last_folder] > CacheManager.FOLDER_SIZE:
new_folder = create_folder()
CacheManager.folder_dict[new_folder] = 0
else:
new_folder = create_folder()
CacheManager.folder_dict[new_folder] = 0
import hashlib
from pathlib import Path

def get_current_folder(self) -> Path:
"""Get the current cache folder path that should be used."""
self.check_folder_status()
self.cull_folders()

return sorted(CacheManager.folder_dict.keys())[-1]
def _get_hash(file_path: Path) -> str:
return hashlib.shake_128(str(file_path).encode("utf-8")).hexdigest(8)

def register_existing_folders(self):
"""Scan and register any pre-existing cache folders with the most recent size."""
self.last_lib_path = self.lib.library_dir
CacheManager.folder_dict.clear()

if self.last_lib_path:
# Ensure thumbnail cache path exists.
self.cache_dir().mkdir(exist_ok=True)
# Registers any existing folders and counts the capacity of the most recent one.
for f in sorted(self.cache_dir().glob("*")):
if f.is_dir():
# A folder is found. Add it to the class dict.BlockingIOError
CacheManager.folder_dict[f] = 0
CacheManager.folder_dict = dict(
sorted(CacheManager.folder_dict.items(), key=lambda kv: kv[0])
)
def get_cache_path(cache_folder: Path, file_path: Path) -> Path:
hash = _get_hash(file_path)
folder = Path(hash[:2])
mod_time = file_path.stat().st_mtime_ns
return cache_folder / folder / f"{hash}-{mod_time}.webp"

if CacheManager.folder_dict:
last_folder = sorted(CacheManager.folder_dict.keys())[-1]
for f in last_folder.glob("*"):
if not f.is_dir():
with contextlib.suppress(KeyError):
CacheManager.folder_dict[last_folder] += f.stat().st_size

def cull_folders(self):
"""Remove folders and their cached context based on size or age limits."""
# Ensure that the user's configured size limit isn't less than the internal folder size.
size_limit = max(CacheManager.size_limit, CacheManager.FOLDER_SIZE)
def remove_from_cache(cache_folder: Path, file_path: Path):
hash = _get_hash(file_path)
folder = hash[:2]
cache_folder = cache_folder / folder
for file in cache_folder.glob("{hash}-*.webp"):
if file.is_file():
file.unlink(missing_ok=True)

if len(CacheManager.folder_dict) > (size_limit / CacheManager.FOLDER_SIZE):
f = sorted(CacheManager.folder_dict.keys())[0]
folder = self.cache_dir() / f
logger.info("[CacheManager] Removing folder due to size limit", folder=folder)

for file in folder.glob("*"):
try:
file.unlink()
except Exception as e:
logger.error(
"[CacheManager] Couldn't cull file inside of folder!",
error=e,
file=file,
folder=folder,
)
try:
folder.rmdir()
with contextlib.suppress(KeyError):
CacheManager.folder_dict.pop(f)
self.cull_folders()
except Exception as e:
logger.error("[CacheManager] Couldn't cull folder!", error=e, folder=folder)
pass
def clear_cache(cache_folder: Path):
for folder in cache_folder.iterdir():
for file in folder.iterdir():
file.unlink()
folder.rmdir()
Loading