Skip to content

cleanup the refresh_dir code, update tests #494

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 2 commits into from
Sep 12, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
5 changes: 3 additions & 2 deletions .github/workflows/mypy.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -23,8 +23,9 @@ jobs:

- name: Install dependencies
run: |
pip install -r requirements.txt
pip install mypy==1.11.2
python -m pip install --upgrade uv
uv pip install --system -r requirements.txt
uv pip install --system mypy==1.11.2
mkdir tagstudio/.mypy_cache

- uses: tsuyoshicho/action-mypy@v4
Expand Down
13 changes: 0 additions & 13 deletions tagstudio/src/core/library/alchemy/library.py
Original file line number Diff line number Diff line change
Expand Up @@ -25,7 +25,6 @@
selectinload,
make_transient,
)
from typing import TYPE_CHECKING

from .db import make_tables
from .enums import TagColor, FilterState, FieldTypeEnum
Expand All @@ -46,10 +45,6 @@
BACKUP_FOLDER_NAME,
)

if TYPE_CHECKING:
from ...utils.dupe_files import DupeRegistry
from ...utils.missing_files import MissingRegistry

LIBRARY_FILENAME: str = "ts_library.sqlite"

logger = structlog.get_logger(__name__)
Expand Down Expand Up @@ -100,11 +95,6 @@ class Library:
engine: Engine | None
folder: Folder | None

ignored_extensions: list[str]

missing_tracker: "MissingRegistry"
dupe_tracker: "DupeRegistry"

def close(self):
if self.engine:
self.engine.dispose()
Expand Down Expand Up @@ -182,9 +172,6 @@ def open_library(
session.commit()
self.folder = folder

# load ignored extensions
self.ignored_extensions = self.prefs(LibraryPrefs.EXTENSION_LIST)

@property
def default_fields(self) -> list[BaseField]:
with Session(self.engine) as session:
Expand Down
47 changes: 26 additions & 21 deletions tagstudio/src/core/utils/refresh_dir.py
Original file line number Diff line number Diff line change
@@ -1,16 +1,19 @@
import time
from time import time
from collections.abc import Iterator
from dataclasses import dataclass, field
from pathlib import Path

import structlog

from src.core.constants import TS_FOLDER_NAME
from src.core.library import Library, Entry

logger = structlog.get_logger(__name__)


@dataclass
class RefreshDirTracker:
library: Library
dir_file_count: int = 0
files_not_in_library: list[Path] = field(default_factory=list)

@property
Expand All @@ -36,38 +39,40 @@ def save_new_files(self) -> Iterator[int]:

self.files_not_in_library = []

def refresh_dir(self) -> Iterator[int]:
def refresh_dir(self, lib_path: Path) -> Iterator[int]:
"""Scan a directory for files, and add those relative filenames to internal variables."""
if self.library.folder is None:
raise ValueError("No folder set.")
if self.library.library_dir is None:
raise ValueError("No library directory set.")

start_time = time.time()
self.files_not_in_library = []
self.dir_file_count = 0
start_time_total = time()
start_time_loop = time()

lib_path = self.library.folder.path
self.files_not_in_library = []
dir_file_count = 0

for path in lib_path.glob("**/*"):
str_path = str(path)
if (
path.is_dir()
or "$RECYCLE.BIN" in str_path
or TS_FOLDER_NAME in str_path
or "tagstudio_thumbs" in str_path
):
if path.is_dir():
continue

suffix = path.suffix.lower().lstrip(".")
if suffix in self.library.ignored_extensions:
if "$RECYCLE.BIN" in str_path or TS_FOLDER_NAME in str_path:
continue

self.dir_file_count += 1
dir_file_count += 1
relative_path = path.relative_to(lib_path)
# TODO - load these in batch somehow
if not self.library.has_path_entry(relative_path):
self.files_not_in_library.append(relative_path)

end_time = time.time()
# Yield output every 1/30 of a second
if (end_time - start_time) > 0.034:
yield self.dir_file_count
if (time() - start_time_loop) > 0.034:
yield dir_file_count
start_time_loop = time()

end_time_total = time()
logger.info(
"Directory scan time",
path=lib_path,
duration=(end_time_total - start_time_total),
new_files_count=dir_file_count,
)
2 changes: 1 addition & 1 deletion tagstudio/src/qt/ts_qt.py
Original file line number Diff line number Diff line change
Expand Up @@ -675,7 +675,7 @@ def add_new_files_callback(self):
)
pw.show()

iterator = FunctionIterator(tracker.refresh_dir)
iterator = FunctionIterator(lambda: tracker.refresh_dir(self.lib.library_dir))
iterator.value.connect(
lambda x: (
pw.update_progress(x + 1),
Expand Down
2 changes: 1 addition & 1 deletion tagstudio/tests/conftest.py
Original file line number Diff line number Diff line change
Expand Up @@ -24,7 +24,7 @@ def cwd():
@pytest.fixture
def library(request):
# when no param is passed, use the default
library_path = "/tmp/"
library_path = "/dev/null/"
if hasattr(request, "param"):
if isinstance(request.param, TemporaryDirectory):
library_path = request.param.name
Expand Down
1 change: 1 addition & 0 deletions tagstudio/tests/macros/test_dupe_entries.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,7 @@


def test_refresh_dupe_files(library):
library.library_dir = "/tmp/"
entry = Entry(
folder=library.folder,
path=pathlib.Path("bar/foo.txt"),
Expand Down
18 changes: 12 additions & 6 deletions tagstudio/tests/macros/test_refresh_dir.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,18 +2,24 @@
from tempfile import TemporaryDirectory

import pytest

from src.core.constants import LibraryPrefs
from src.core.utils.refresh_dir import RefreshDirTracker

CWD = pathlib.Path(__file__).parent


@pytest.mark.parametrize("exclude_mode", [True, False])
@pytest.mark.parametrize("library", [TemporaryDirectory()], indirect=True)
def test_refresh_new_files(library):
def test_refresh_new_files(library, exclude_mode):
# Given
library.set_prefs(LibraryPrefs.IS_EXCLUDE_LIST, exclude_mode)
library.set_prefs(LibraryPrefs.EXTENSION_LIST, [".md"])
registry = RefreshDirTracker(library=library)
(library.library_dir / "FOO.MD").touch()

# touch new files to simulate new files
(library.library_dir / "foo.md").touch()

assert not list(registry.refresh_dir())
# When
assert not list(registry.refresh_dir(library.library_dir))

assert registry.files_not_in_library == [pathlib.Path("foo.md")]
# Then
assert registry.files_not_in_library == [pathlib.Path("FOO.MD")]
3 changes: 3 additions & 0 deletions tagstudio/tests/qt/test_preview_panel.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,7 @@
from pathlib import Path
from tempfile import TemporaryDirectory

import pytest

from src.core.library import Entry
from src.core.library.alchemy.enums import FieldTypeEnum
Expand All @@ -18,6 +20,7 @@ def test_update_widgets_not_selected(qt_driver, library):
assert panel.file_label.text() == "No Items Selected"


@pytest.mark.parametrize("library", [TemporaryDirectory()], indirect=True)
def test_update_widgets_single_selected(qt_driver, library):
qt_driver.frame_content = list(library.get_entries())
qt_driver.selected = [0]
Expand Down