Skip to content

Commit ef68603

Browse files
authored
feat(parity): migrate json libraries to sqlite (#604)
* feat(ui): add PagedPanel widget * feat(ui): add MigrationModal widget * feat: add basic json to sql conversion * fix: chose `poolclass` based on file or memory db * feat: migrate tag colors from json to sql * feat: migrate entry fields from json to sql - fix: tag name column no longer has unique constraint - fix: tags are referenced by id in db queries - fix: tag_search_panel no longer queries db on initialization; does not regress to empty search window when shown - fix: tag name search no longer uses library grid FilterState object - fix: tag name search now respects tag limit * set default `is_new` case * fix: limit correct tag query * feat: migrate tag aliases and subtags from json to sql * add migration timer * fix(tests): fix broken tests * rename methods, add docstrings * revert tag id search, split tag name search * fix: use correct type in sidecar macro * tests: add json migration tests * fix: drop leading dot from json extensions * add special characters to json db test * tests: add file path and entry field parity checks * fix(ui): tag manager no longer starts empty * fix: read old windows paths as posix Addresses #298 * tests: add posix + windows paths to json library * tests: add subtag, alias, and shorthand parity tests * tests: ensure no none values in parity checks * tests: add tag color test, use tag id in tag tests * tests: fix and optimize tests * tests: add discrepancy tracker * refactor: reduce duplicate UI code * fix: load non-sequential entry ids * fix(ui): sort tags in the preview panel * tests(fix): prioritize `None` check over equality * fix(tests): fix multi "same tag field type" tests * ui: increase height of migration modal * feat: add progress bar to migration ui * fix(ui): sql values update earlier * refactor: use `get_color_from_str` in test * refactor: migrate tags before aliases and subtags * remove unused assertion * refactor: use `json_migration_req` flag
1 parent b7e652a commit ef68603

File tree

17 files changed

+1244
-80
lines changed

17 files changed

+1244
-80
lines changed

.gitignore

Lines changed: 4 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -55,7 +55,6 @@ coverage.xml
5555
.hypothesis/
5656
.pytest_cache/
5757
cover/
58-
tagstudio/tests/fixtures/library/*
5958

6059
# Translations
6160
*.mo
@@ -255,11 +254,14 @@ compile_commands.json
255254
# Ignore all local history of files
256255
.history
257256
.ionide
257+
# End of https://www.toptal.com/developers/gitignore/api/visualstudiocode,python,qt
258258

259259
# TagStudio
260260
.TagStudio
261+
!*/tests/**/.TagStudio
262+
tagstudio/tests/fixtures/library/*
263+
tagstudio/tests/fixtures/json_library/.TagStudio/*.sqlite
261264
TagStudio.ini
262-
# End of https://www.toptal.com/developers/gitignore/api/visualstudiocode,python,qt
263265

264266
.envrc
265267
.direnv

tagstudio/src/core/library/alchemy/enums.py

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -42,6 +42,13 @@ class TagColor(enum.IntEnum):
4242
COOL_GRAY = 36
4343
OLIVE = 37
4444

45+
@staticmethod
46+
def get_color_from_str(color_name: str) -> "TagColor":
47+
for color in TagColor:
48+
if color.name == color_name.upper().replace(" ", "_"):
49+
return color
50+
return TagColor.DEFAULT
51+
4552

4653
class SearchMode(enum.IntEnum):
4754
"""Operational modes for item searching."""

tagstudio/src/core/library/alchemy/library.py

Lines changed: 171 additions & 30 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,6 @@
11
import re
22
import shutil
3+
import time
34
import unicodedata
45
from dataclasses import dataclass
56
from datetime import UTC, datetime
@@ -9,9 +10,11 @@
910
from uuid import uuid4
1011

1112
import structlog
13+
from humanfriendly import format_timespan
1214
from sqlalchemy import (
1315
URL,
1416
Engine,
17+
NullPool,
1518
and_,
1619
create_engine,
1720
delete,
@@ -29,6 +32,7 @@
2932
make_transient,
3033
selectinload,
3134
)
35+
from src.core.library.json.library import Library as JsonLibrary # type: ignore
3236

3337
from ...constants import (
3438
BACKUP_FOLDER_NAME,
@@ -122,6 +126,7 @@ class LibraryStatus:
122126
success: bool
123127
library_path: Path | None = None
124128
message: str | None = None
129+
json_migration_req: bool = False
125130

126131

127132
class Library:
@@ -133,7 +138,8 @@ class Library:
133138
folder: Folder | None
134139
included_files: set[Path] = set()
135140

136-
FILENAME: str = "ts_library.sqlite"
141+
SQL_FILENAME: str = "ts_library.sqlite"
142+
JSON_FILENAME: str = "ts_library.json"
137143

138144
def close(self):
139145
if self.engine:
@@ -143,32 +149,119 @@ def close(self):
143149
self.folder = None
144150
self.included_files = set()
145151

152+
def migrate_json_to_sqlite(self, json_lib: JsonLibrary):
153+
"""Migrate JSON library data to the SQLite database."""
154+
logger.info("Starting Library Conversion...")
155+
start_time = time.time()
156+
folder: Folder = Folder(path=self.library_dir, uuid=str(uuid4()))
157+
158+
# Tags
159+
for tag in json_lib.tags:
160+
self.add_tag(
161+
Tag(
162+
id=tag.id,
163+
name=tag.name,
164+
shorthand=tag.shorthand,
165+
color=TagColor.get_color_from_str(tag.color),
166+
)
167+
)
168+
169+
# Tag Aliases
170+
for tag in json_lib.tags:
171+
for alias in tag.aliases:
172+
self.add_alias(name=alias, tag_id=tag.id)
173+
174+
# Tag Subtags
175+
for tag in json_lib.tags:
176+
for subtag_id in tag.subtag_ids:
177+
self.add_subtag(parent_id=tag.id, child_id=subtag_id)
178+
179+
# Entries
180+
self.add_entries(
181+
[
182+
Entry(
183+
path=entry.path / entry.filename,
184+
folder=folder,
185+
fields=[],
186+
id=entry.id + 1, # JSON IDs start at 0 instead of 1
187+
)
188+
for entry in json_lib.entries
189+
]
190+
)
191+
for entry in json_lib.entries:
192+
for field in entry.fields:
193+
for k, v in field.items():
194+
self.add_entry_field_type(
195+
entry_ids=(entry.id + 1), # JSON IDs start at 0 instead of 1
196+
field_id=self.get_field_name_from_id(k),
197+
value=v,
198+
)
199+
200+
# Preferences
201+
self.set_prefs(LibraryPrefs.EXTENSION_LIST, [x.strip(".") for x in json_lib.ext_list])
202+
self.set_prefs(LibraryPrefs.IS_EXCLUDE_LIST, json_lib.is_exclude_list)
203+
204+
end_time = time.time()
205+
logger.info(f"Library Converted! ({format_timespan(end_time-start_time)})")
206+
207+
def get_field_name_from_id(self, field_id: int) -> _FieldID:
208+
for f in _FieldID:
209+
if field_id == f.value.id:
210+
return f
211+
return None
212+
146213
def open_library(self, library_dir: Path, storage_path: str | None = None) -> LibraryStatus:
214+
is_new: bool = True
147215
if storage_path == ":memory:":
148216
self.storage_path = storage_path
149217
is_new = True
218+
return self.open_sqlite_library(library_dir, is_new)
150219
else:
151-
self.verify_ts_folders(library_dir)
152-
self.storage_path = library_dir / TS_FOLDER_NAME / self.FILENAME
153-
is_new = not self.storage_path.exists()
220+
self.storage_path = library_dir / TS_FOLDER_NAME / self.SQL_FILENAME
221+
222+
if self.verify_ts_folder(library_dir) and (is_new := not self.storage_path.exists()):
223+
json_path = library_dir / TS_FOLDER_NAME / self.JSON_FILENAME
224+
if json_path.exists():
225+
return LibraryStatus(
226+
success=False,
227+
library_path=library_dir,
228+
message="[JSON] Legacy v9.4 library requires conversion to v9.5+",
229+
json_migration_req=True,
230+
)
231+
232+
return self.open_sqlite_library(library_dir, is_new)
154233

234+
def open_sqlite_library(
235+
self, library_dir: Path, is_new: bool, add_default_data: bool = True
236+
) -> LibraryStatus:
155237
connection_string = URL.create(
156238
drivername="sqlite",
157239
database=str(self.storage_path),
158240
)
241+
# NOTE: File-based databases should use NullPool to create new DB connection in order to
242+
# keep connections on separate threads, which prevents the DB files from being locked
243+
# even after a connection has been closed.
244+
# SingletonThreadPool (the default for :memory:) should still be used for in-memory DBs.
245+
# More info can be found on the SQLAlchemy docs:
246+
# https://docs.sqlalchemy.org/en/20/changelog/migration_07.html
247+
# Under -> sqlite-the-sqlite-dialect-now-uses-nullpool-for-file-based-databases
248+
poolclass = None if self.storage_path == ":memory:" else NullPool
159249

160-
logger.info("opening library", library_dir=library_dir, connection_string=connection_string)
161-
self.engine = create_engine(connection_string)
250+
logger.info(
251+
"Opening SQLite Library", library_dir=library_dir, connection_string=connection_string
252+
)
253+
self.engine = create_engine(connection_string, poolclass=poolclass)
162254
with Session(self.engine) as session:
163255
make_tables(self.engine)
164256

165-
tags = get_default_tags()
166-
try:
167-
session.add_all(tags)
168-
session.commit()
169-
except IntegrityError:
170-
# default tags may exist already
171-
session.rollback()
257+
if add_default_data:
258+
tags = get_default_tags()
259+
try:
260+
session.add_all(tags)
261+
session.commit()
262+
except IntegrityError:
263+
# default tags may exist already
264+
session.rollback()
172265

173266
# dont check db version when creating new library
174267
if not is_new:
@@ -219,7 +312,6 @@ def open_library(self, library_dir: Path, storage_path: str | None = None) -> Li
219312
db_version=db_version.value,
220313
expected=LibraryPrefs.DB_VERSION.default,
221314
)
222-
# TODO - handle migration
223315
return LibraryStatus(
224316
success=False,
225317
message=(
@@ -354,8 +446,12 @@ def tags(self) -> list[Tag]:
354446

355447
return list(tags_list)
356448

357-
def verify_ts_folders(self, library_dir: Path) -> None:
358-
"""Verify/create folders required by TagStudio."""
449+
def verify_ts_folder(self, library_dir: Path) -> bool:
450+
"""Verify/create folders required by TagStudio.
451+
452+
Returns:
453+
bool: True if path exists, False if it needed to be created.
454+
"""
359455
if library_dir is None:
360456
raise ValueError("No path set.")
361457

@@ -366,6 +462,8 @@ def verify_ts_folders(self, library_dir: Path) -> None:
366462
if not full_ts_path.exists():
367463
logger.info("creating library directory", dir=full_ts_path)
368464
full_ts_path.mkdir(parents=True, exist_ok=True)
465+
return False
466+
return True
369467

370468
def add_entries(self, items: list[Entry]) -> list[int]:
371469
"""Add multiple Entry records to the Library."""
@@ -507,21 +605,23 @@ def search_library(
507605

508606
def search_tags(
509607
self,
510-
search: FilterState,
608+
name: str,
511609
) -> list[Tag]:
512610
"""Return a list of Tag records matching the query."""
611+
tag_limit = 100
612+
513613
with Session(self.engine) as session:
514614
query = select(Tag)
515615
query = query.options(
516616
selectinload(Tag.subtags),
517617
selectinload(Tag.aliases),
518-
)
618+
).limit(tag_limit)
519619

520-
if search.tag:
620+
if name:
521621
query = query.where(
522622
or_(
523-
Tag.name.icontains(search.tag),
524-
Tag.shorthand.icontains(search.tag),
623+
Tag.name.icontains(name),
624+
Tag.shorthand.icontains(name),
525625
)
526626
)
527627

@@ -531,7 +631,7 @@ def search_tags(
531631

532632
logger.info(
533633
"searching tags",
534-
search=search,
634+
search=name,
535635
statement=str(query),
536636
results=len(res),
537637
)
@@ -694,7 +794,7 @@ def add_entry_field_type(
694794
*,
695795
field: ValueType | None = None,
696796
field_id: _FieldID | str | None = None,
697-
value: str | datetime | list[str] | None = None,
797+
value: str | datetime | list[int] | None = None,
698798
) -> bool:
699799
logger.info(
700800
"add_field_to_entry",
@@ -727,8 +827,11 @@ def add_entry_field_type(
727827

728828
if value:
729829
assert isinstance(value, list)
730-
for tag in value:
731-
field_model.tags.add(Tag(name=tag))
830+
with Session(self.engine) as session:
831+
for tag_id in list(set(value)):
832+
tag = session.scalar(select(Tag).where(Tag.id == tag_id))
833+
field_model.tags.add(tag)
834+
session.flush()
732835

733836
elif field.type == FieldTypeEnum.DATETIME:
734837
field_model = DatetimeField(
@@ -760,6 +863,28 @@ def add_entry_field_type(
760863
)
761864
return True
762865

866+
def tag_from_strings(self, strings: list[str] | str) -> list[int]:
867+
"""Create a Tag from a given string."""
868+
# TODO: Port over tag searching with aliases fallbacks
869+
# and context clue ranking for string searches.
870+
tags: list[int] = []
871+
872+
if isinstance(strings, str):
873+
strings = [strings]
874+
875+
with Session(self.engine) as session:
876+
for string in strings:
877+
tag = session.scalar(select(Tag).where(Tag.name == string))
878+
if tag:
879+
tags.append(tag.id)
880+
else:
881+
new = session.add(Tag(name=string))
882+
if new:
883+
tags.append(new.id)
884+
session.flush()
885+
session.commit()
886+
return tags
887+
763888
def add_tag(
764889
self,
765890
tag: Tag,
@@ -852,7 +977,7 @@ def save_library_backup_to_disk(self) -> Path:
852977
target_path = self.library_dir / TS_FOLDER_NAME / BACKUP_FOLDER_NAME / filename
853978

854979
shutil.copy2(
855-
self.library_dir / TS_FOLDER_NAME / self.FILENAME,
980+
self.library_dir / TS_FOLDER_NAME / self.SQL_FILENAME,
856981
target_path,
857982
)
858983

@@ -879,15 +1004,15 @@ def get_alias(self, tag_id: int, alias_id: int) -> TagAlias:
8791004

8801005
return alias
8811006

882-
def add_subtag(self, base_id: int, new_tag_id: int) -> bool:
883-
if base_id == new_tag_id:
1007+
def add_subtag(self, parent_id: int, child_id: int) -> bool:
1008+
if parent_id == child_id:
8841009
return False
8851010

8861011
# open session and save as parent tag
8871012
with Session(self.engine) as session:
8881013
subtag = TagSubtag(
889-
parent_id=base_id,
890-
child_id=new_tag_id,
1014+
parent_id=parent_id,
1015+
child_id=child_id,
8911016
)
8921017

8931018
try:
@@ -899,6 +1024,22 @@ def add_subtag(self, base_id: int, new_tag_id: int) -> bool:
8991024
logger.exception("IntegrityError")
9001025
return False
9011026

1027+
def add_alias(self, name: str, tag_id: int) -> bool:
1028+
with Session(self.engine) as session:
1029+
alias = TagAlias(
1030+
name=name,
1031+
tag_id=tag_id,
1032+
)
1033+
1034+
try:
1035+
session.add(alias)
1036+
session.commit()
1037+
return True
1038+
except IntegrityError:
1039+
session.rollback()
1040+
logger.exception("IntegrityError")
1041+
return False
1042+
9021043
def remove_subtag(self, base_id: int, remove_tag_id: int) -> bool:
9031044
with Session(self.engine) as session:
9041045
p_id = base_id

0 commit comments

Comments
 (0)