Skip to content

fix: refactor and fix bugs with missing_files.py #739

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 5 commits into from
Jan 28, 2025
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
47 changes: 46 additions & 1 deletion tagstudio/src/core/library/alchemy/library.py
Original file line number Diff line number Diff line change
Expand Up @@ -490,6 +490,32 @@ def get_entries_full(self, entry_ids: list[int] | set[int]) -> Iterator[Entry]:
yield entry
session.expunge(entry)

def get_entry_full_by_path(self, path: Path) -> Entry | None:
"""Get the entry with the corresponding path."""
with Session(self.engine) as session:
stmt = select(Entry).where(Entry.path == path)
stmt = (
stmt.outerjoin(Entry.text_fields)
.outerjoin(Entry.datetime_fields)
.options(selectinload(Entry.text_fields), selectinload(Entry.datetime_fields))
)
stmt = (
stmt.outerjoin(Entry.tags)
.outerjoin(TagAlias)
.options(
selectinload(Entry.tags).options(
joinedload(Tag.aliases),
joinedload(Tag.parent_tags),
)
)
)
entry = session.scalar(stmt)
if not entry:
return None
session.expunge(entry)
make_transient(entry)
return entry

@property
def entries_count(self) -> int:
with Session(self.engine) as session:
Expand Down Expand Up @@ -698,7 +724,13 @@ def search_tags(

return res

def update_entry_path(self, entry_id: int | Entry, path: Path) -> None:
def update_entry_path(self, entry_id: int | Entry, path: Path) -> bool:
"""Set the path field of an entry.

Returns True if the action succeeded and False if the path already exists.
"""
if self.has_path_entry(path):
return False
if isinstance(entry_id, Entry):
entry_id = entry_id.id

Expand All @@ -715,6 +747,7 @@ def update_entry_path(self, entry_id: int | Entry, path: Path) -> None:

session.execute(update_stmt)
session.commit()
return True

def remove_tag(self, tag: Tag):
with Session(self.engine, expire_on_commit=False) as session:
Expand Down Expand Up @@ -1185,6 +1218,18 @@ def mirror_entry_fields(self, *entries: Entry) -> None:
value=field.value,
)

def merge_entries(self, from_entry: Entry, into_entry: Entry) -> None:
"""Add fields and tags from the first entry to the second, and then delete the first."""
for field in from_entry.fields:
self.add_field_to_entry(
entry_id=into_entry.id,
field_id=field.type_key,
value=field.value,
)
tag_ids = [tag.id for tag in from_entry.tags]
self.add_tags_to_entry(into_entry.id, tag_ids)
self.remove_entries([from_entry.id])

@property
def tag_color_groups(self) -> dict[str, list[TagColorGroup]]:
"""Return every TagColorGroup in the library."""
Expand Down
73 changes: 47 additions & 26 deletions tagstudio/src/core/utils/missing_files.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,10 +4,7 @@

import structlog
from src.core.library import Entry, Library

IGNORE_ITEMS = [
"$recycle.bin",
]
from src.core.utils.refresh_dir import GLOBAL_IGNORE_SET

logger = structlog.get_logger()

Expand All @@ -18,49 +15,73 @@ class MissingRegistry:

library: Library
files_fixed_count: int = 0
missing_files: list[Entry] = field(default_factory=list)
missing_file_entries: list[Entry] = field(default_factory=list)

@property
def missing_files_count(self) -> int:
return len(self.missing_files)
def missing_file_entries_count(self) -> int:
return len(self.missing_file_entries)

def refresh_missing_files(self) -> Iterator[int]:
"""Track the number of Entries that point to an invalid file path."""
logger.info("refresh_missing_files running")
self.missing_files = []
"""Track the number of entries that point to an invalid filepath."""
logger.info("[refresh_missing_files] Refreshing missing files...")
self.missing_file_entries = []
for i, entry in enumerate(self.library.get_entries()):
full_path = self.library.library_dir / entry.path
if not full_path.exists() or not full_path.is_file():
self.missing_files.append(entry)
self.missing_file_entries.append(entry)
yield i

def match_missing_file(self, match_item: Entry) -> list[Path]:
"""Try to find missing entry files within the library directory.
def match_missing_file_entry(self, match_entry: Entry) -> list[Path]:
"""Try and match unlinked file entries with matching results in the library directory.

Works if files were just moved to different subfolders and don't have duplicate names.
"""
matches = []
for item in self.library.library_dir.glob(f"**/{match_item.path.name}"):
if item.name == match_item.path.name: # TODO - implement IGNORE_ITEMS
new_path = Path(item).relative_to(self.library.library_dir)
for path in self.library.library_dir.glob(f"**/{match_entry.path.name}"):
# Ensure matched file isn't in a globally ignored folder
skip: bool = False
for part in path.parts:
if part in GLOBAL_IGNORE_SET:
skip = True
break
if skip:
continue
if path.name == match_entry.path.name:
new_path = Path(path).relative_to(self.library.library_dir)
matches.append(new_path)

logger.info("[MissingRegistry] Matches", matches=matches)
return matches

def fix_missing_files(self) -> Iterator[int]:
"""Attempt to fix missing files by finding a match in the library directory."""
def fix_unlinked_entries(self) -> Iterator[int]:
"""Attempt to fix unlinked file entries by finding a match in the library directory."""
self.files_fixed_count = 0
for i, entry in enumerate(self.missing_files, start=1):
item_matches = self.match_missing_file(entry)
matched_entries: list[Entry] = []
for i, entry in enumerate(self.missing_file_entries):
item_matches = self.match_missing_file_entry(entry)
if len(item_matches) == 1:
logger.info("fix_missing_files", entry=entry, item_matches=item_matches)
self.library.update_entry_path(entry.id, item_matches[0])
logger.info(
"[fix_unlinked_entries]",
entry=entry.path.as_posix(),
item_matches=item_matches[0].as_posix(),
)
if not self.library.update_entry_path(entry.id, item_matches[0]):
try:
match = self.library.get_entry_full_by_path(item_matches[0])
entry_full = self.library.get_entry_full(entry.id)
self.library.merge_entries(entry_full, match)
except AttributeError:
continue
self.files_fixed_count += 1
# remove fixed file
self.missing_files.remove(entry)
matched_entries.append(entry)
yield i

for entry in matched_entries:
self.missing_file_entries.remove(entry)

def execute_deletion(self) -> None:
self.library.remove_entries(list(map(lambda missing: missing.id, self.missing_files)))
self.library.remove_entries(
list(map(lambda missing: missing.id, self.missing_file_entries))
)

self.missing_files = []
self.missing_file_entries = []
8 changes: 4 additions & 4 deletions tagstudio/src/qt/modals/delete_unlinked.py
Original file line number Diff line number Diff line change
Expand Up @@ -43,7 +43,7 @@ def __init__(self, driver: "QtDriver", tracker: MissingRegistry):
Translations.translate_qobject(
self.desc_widget,
"entries.unlinked.delete.confirm",
count=self.tracker.missing_files_count,
count=self.tracker.missing_file_entries_count,
)
self.desc_widget.setAlignment(Qt.AlignmentFlag.AlignCenter)

Expand Down Expand Up @@ -75,12 +75,12 @@ def __init__(self, driver: "QtDriver", tracker: MissingRegistry):
def refresh_list(self):
self.desc_widget.setText(
Translations.translate_formatted(
"entries.unlinked.delete.confirm", count=self.tracker.missing_files_count
"entries.unlinked.delete.confirm", count=self.tracker.missing_file_entries_count
)
)

self.model.clear()
for i in self.tracker.missing_files:
for i in self.tracker.missing_file_entries:
item = QStandardItem(str(i.path))
item.setEditable(False)
self.model.appendRow(item)
Expand All @@ -90,7 +90,7 @@ def displayed_text(x):
return Translations.translate_formatted(
"entries.unlinked.delete.deleting_count",
idx=x,
count=self.tracker.missing_files_count,
count=self.tracker.missing_file_entries_count,
)

pw = ProgressWidget(
Expand Down
2 changes: 1 addition & 1 deletion tagstudio/src/qt/modals/fix_unlinked.py
Original file line number Diff line number Diff line change
Expand Up @@ -129,7 +129,7 @@ def set_missing_count(self, count: int | None = None):
if count is not None:
self.missing_count = count
else:
self.missing_count = self.tracker.missing_files_count
self.missing_count = self.tracker.missing_file_entries_count

if self.missing_count < 0:
self.search_button.setDisabled(True)
Expand Down
6 changes: 3 additions & 3 deletions tagstudio/src/qt/modals/relink_unlinked.py
Original file line number Diff line number Diff line change
Expand Up @@ -21,16 +21,16 @@ def displayed_text(x):
return Translations.translate_formatted(
"entries.unlinked.relink.attempting",
idx=x,
missing_count=self.tracker.missing_files_count,
missing_count=self.tracker.missing_file_entries_count,
fixed_count=self.tracker.files_fixed_count,
)

pw = ProgressWidget(
label_text="",
cancel_button_text=None,
minimum=0,
maximum=self.tracker.missing_files_count,
maximum=self.tracker.missing_file_entries_count,
)
Translations.translate_with_setter(pw.setWindowTitle, "entries.unlinked.relink.title")

pw.from_iterable_function(self.tracker.fix_missing_files, displayed_text, self.done.emit)
pw.from_iterable_function(self.tracker.fix_unlinked_entries, displayed_text, self.done.emit)
1 change: 1 addition & 0 deletions tagstudio/src/qt/widgets/preview/preview_thumb.py
Original file line number Diff line number Diff line change
Expand Up @@ -198,6 +198,7 @@ def _update_image(self, filepath: Path, ext: str) -> dict:
except (
rawpy._rawpy.LibRawIOError,
rawpy._rawpy.LibRawFileUnsupportedError,
FileNotFoundError,
):
pass
elif MediaCategories.is_ext_in_category(
Expand Down
5 changes: 3 additions & 2 deletions tagstudio/tests/macros/test_missing_files.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,7 @@
CWD = pathlib.Path(__file__).parent


# NOTE: Does this test actually work?
@pytest.mark.parametrize("library", [TemporaryDirectory()], indirect=True)
def test_refresh_missing_files(library: Library):
registry = MissingRegistry(library=library)
Expand All @@ -20,10 +21,10 @@ def test_refresh_missing_files(library: Library):
assert list(registry.refresh_missing_files()) == [0, 1]

# neither of the library entries exist
assert len(registry.missing_files) == 2
assert len(registry.missing_file_entries) == 2

# iterate through two files
assert list(registry.fix_missing_files()) == [1, 2]
assert list(registry.fix_unlinked_entries()) == [0, 1]

# `bar.md` should be relinked to new correct path
results = library.search_library(FilterState.from_path("bar.md"))
Expand Down
35 changes: 35 additions & 0 deletions tagstudio/tests/test_library.py
Original file line number Diff line number Diff line change
Expand Up @@ -309,6 +309,41 @@ def test_mirror_entry_fields(library: Library, entry_full):
}


def test_merge_entries(library: Library):
a = Entry(
folder=library.folder,
path=Path("a"),
fields=[
TextField(type_key=_FieldID.AUTHOR.name, value="Author McAuthorson", position=0),
TextField(type_key=_FieldID.DESCRIPTION.name, value="test description", position=2),
],
)
b = Entry(
folder=library.folder,
path=Path("b"),
fields=[TextField(type_key=_FieldID.NOTES.name, value="test note", position=1)],
)
try:
ids = library.add_entries([a, b])
entry_a = library.get_entry_full(ids[0])
entry_b = library.get_entry_full(ids[1])
tag_0 = library.add_tag(Tag(id=1000, name="tag_0"))
tag_1 = library.add_tag(Tag(id=1001, name="tag_1"))
tag_2 = library.add_tag(Tag(id=1002, name="tag_2"))
library.add_tags_to_entry(ids[0], [tag_0.id, tag_2.id])
library.add_tags_to_entry(ids[1], [tag_1.id])
library.merge_entries(entry_a, entry_b)
assert library.has_path_entry(Path("b"))
assert not library.has_path_entry(Path("a"))
fields = [field.value for field in entry_a.fields]
assert "Author McAuthorson" in fields
assert "test description" in fields
assert "test note" in fields
assert b.has_tag(tag_0) and b.has_tag(tag_1) and b.has_tag(tag_2)
except AttributeError:
AssertionError()


def test_remove_tag_from_entry(library, entry_full):
removed_tag_id = -1
for tag in entry_full.tags:
Expand Down
Loading