Skip to content

Commit 29d227d

Browse files
ComputerdoresCyanVoxel
authored andcommitted
feat: implement query language (TagStudioDev#606)
* add files * fix: term was parsing ANDList instead of ORList * make mypy happy * ruff format * add missing todo * add more constraint types * add parent property to AST * add BaseVisitor class * make mypy happy * add __init__.py * Revert "make mypy happy" This reverts commit 926d0dd. * refactoring and fixes * rudimentary search field integration * fix: check for None properly * fix: Entries without Tags are now searchable * make mypy happy * Revert "fix: Entries without Tags are now searchable" This reverts commit 19b40af. * fix: changed joins to outerjoins and added missing outerjoin * use query lang instead of tag_id FIlterState * add todos * fix: remove uncecessary line that broke search when searching for exact name * fix tag search * refactoring * fix: path now uses GLOB operator for proper GLOBs * refactoring: remove FilterState.id and implement Library.get_entry_full as replacement * fix: use default value notation instead of if None statement in __post_init__ * remove obsolete Search Mode UI and related code * ruff fixes * remove obsolete tests * fix: item_thumb didn't query entries correctly * fix: search_library now correctly returns the number of *unique* entries * make mypy happy * implement NOT * remove obsolete filename search * remove summary as it is not applicable anymore * finish refactoring of FilterState * implement special:untagged * fix: make mypy happy * Revert changes to search_tags in favor of changes from TagStudioDev#604 * fix: also port test changes * fix: remove unneccessary import * fix: remove unused dataclass * fix: AND now works correctly with tags * simplify structure of parsed AST * add performance logging * perf: Improve performance of search by reducing number of required joins from 4 to 1 * perf: double NOT is now optimized out of the AST * fix: bug where pages would show less than the configured number of entries * Revert "add performance logging" This reverts commit c3c7d75. * fix: tag_id search was broken * somewhat adapt the existing autocompletion to this PR * perf: Use Relational Division Queries to improve Query Execution Time * fix: raise Exception so as to not fail silently * fix: Parser bug broke parentheses * little bit of clean up * remove unnecessary comment * add library for testing search * feat: add basic tests * fix: and queries containing just one tag were broken * chore: remove debug code * feat: more tests * refactor: more consistent name for variable Co-authored-by: Travis Abendshien <46939827+CyanVoxel@users.noreply.github.com> * fix: ruff check complaint over double import --------- Co-authored-by: Travis Abendshien <46939827+CyanVoxel@users.noreply.github.com>
1 parent 7761fcf commit 29d227d

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

51 files changed

+831
-277
lines changed

tagstudio/src/core/library/alchemy/enums.py

Lines changed: 39 additions & 71 deletions
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,10 @@
11
import enum
2-
from dataclasses import dataclass
2+
from dataclasses import dataclass, replace
33
from pathlib import Path
44

5+
from src.core.query_lang import AST as Query # noqa: N811
6+
from src.core.query_lang import Constraint, ConstraintType, Parser
7+
58

69
class TagColor(enum.IntEnum):
710
DEFAULT = 1
@@ -50,13 +53,6 @@ def get_color_from_str(color_name: str) -> "TagColor":
5053
return TagColor.DEFAULT
5154

5255

53-
class SearchMode(enum.IntEnum):
54-
"""Operational modes for item searching."""
55-
56-
AND = 0
57-
OR = 1
58-
59-
6056
class ItemType(enum.Enum):
6157
ENTRY = 0
6258
COLLATION = 1
@@ -68,71 +64,12 @@ class FilterState:
6864
"""Represent a state of the Library grid view."""
6965

7066
# these should remain
71-
page_index: int | None = None
72-
page_size: int | None = None
73-
search_mode: SearchMode = SearchMode.AND # TODO - actually implement this
67+
page_index: int | None = 0
68+
page_size: int | None = 500
7469

7570
# these should be erased on update
76-
# tag name
77-
tag: str | None = None
78-
# tag ID
79-
tag_id: int | None = None
80-
81-
# entry id
82-
id: int | None = None
83-
# whole path
84-
path: Path | str | None = None
85-
# file name
86-
name: str | None = None
87-
# file type
88-
filetype: str | None = None
89-
mediatype: str | None = None
90-
91-
# a generic query to be parsed
92-
query: str | None = None
93-
94-
def __post_init__(self):
95-
# strip values automatically
96-
if query := (self.query and self.query.strip()):
97-
# parse the value
98-
if ":" in query:
99-
kind, _, value = query.partition(":")
100-
value = value.replace('"', "")
101-
else:
102-
# default to tag search
103-
kind, value = "tag", query
104-
105-
if kind == "tag_id":
106-
self.tag_id = int(value)
107-
elif kind == "tag":
108-
self.tag = value
109-
elif kind == "path":
110-
self.path = value
111-
elif kind == "name":
112-
self.name = value
113-
elif kind == "id":
114-
self.id = int(self.id) if str(self.id).isnumeric() else self.id
115-
elif kind == "filetype":
116-
self.filetype = value
117-
elif kind == "mediatype":
118-
self.mediatype = value
119-
120-
else:
121-
self.tag = self.tag and self.tag.strip()
122-
self.tag_id = int(self.tag_id) if str(self.tag_id).isnumeric() else self.tag_id
123-
self.path = self.path and str(self.path).strip()
124-
self.name = self.name and self.name.strip()
125-
self.id = int(self.id) if str(self.id).isnumeric() else self.id
126-
127-
if self.page_index is None:
128-
self.page_index = 0
129-
if self.page_size is None:
130-
self.page_size = 500
131-
132-
@property
133-
def summary(self):
134-
"""Show query summary."""
135-
return self.query or self.tag or self.name or self.tag_id or self.path or self.id
71+
# Abstract Syntax Tree Of the current Search Query
72+
ast: Query = None
13673

13774
@property
13875
def limit(self):
@@ -142,6 +79,37 @@ def limit(self):
14279
def offset(self):
14380
return self.page_size * self.page_index
14481

82+
@classmethod
83+
def show_all(cls) -> "FilterState":
84+
return FilterState()
85+
86+
@classmethod
87+
def from_search_query(cls, search_query: str) -> "FilterState":
88+
return cls(ast=Parser(search_query).parse())
89+
90+
@classmethod
91+
def from_tag_id(cls, tag_id: int | str) -> "FilterState":
92+
return cls(ast=Constraint(ConstraintType.TagID, str(tag_id), []))
93+
94+
@classmethod
95+
def from_path(cls, path: Path | str) -> "FilterState":
96+
return cls(ast=Constraint(ConstraintType.Path, str(path).strip(), []))
97+
98+
@classmethod
99+
def from_mediatype(cls, mediatype: str) -> "FilterState":
100+
return cls(ast=Constraint(ConstraintType.MediaType, mediatype, []))
101+
102+
@classmethod
103+
def from_filetype(cls, filetype: str) -> "FilterState":
104+
return cls(ast=Constraint(ConstraintType.FileType, filetype, []))
105+
106+
@classmethod
107+
def from_tag_name(cls, tag_name: str) -> "FilterState":
108+
return cls(ast=Constraint(ConstraintType.Tag, tag_name, []))
109+
110+
def with_page_size(self, page_size: int) -> "FilterState":
111+
return replace(self, page_size=page_size)
112+
145113

146114
class FieldTypeEnum(enum.Enum):
147115
TEXT_LINE = "Text Line"

tagstudio/src/core/library/alchemy/library.py

Lines changed: 34 additions & 55 deletions
Original file line numberDiff line numberDiff line change
@@ -28,7 +28,6 @@
2828
from sqlalchemy.exc import IntegrityError
2929
from sqlalchemy.orm import (
3030
Session,
31-
aliased,
3231
contains_eager,
3332
make_transient,
3433
selectinload,
@@ -42,7 +41,6 @@
4241
TS_FOLDER_NAME,
4342
)
4443
from ...enums import LibraryPrefs
45-
from ...media_types import MediaCategories
4644
from .db import make_tables
4745
from .enums import FieldTypeEnum, FilterState, TagColor
4846
from .fields import (
@@ -54,6 +52,7 @@
5452
)
5553
from .joins import TagField, TagSubtag
5654
from .models import Entry, Folder, Preferences, Tag, TagAlias, ValueType
55+
from .visitors import SQLBoolExpressionBuilder
5756

5857
logger = structlog.get_logger(__name__)
5958

@@ -402,6 +401,29 @@ def get_entry(self, entry_id: int) -> Entry | None:
402401
make_transient(entry)
403402
return entry
404403

404+
def get_entry_full(self, entry_id: int) -> Entry | None:
405+
"""Load entry an join with all joins and all tags."""
406+
with Session(self.engine) as session:
407+
statement = select(Entry).where(Entry.id == entry_id)
408+
statement = (
409+
statement.outerjoin(Entry.text_fields)
410+
.outerjoin(Entry.datetime_fields)
411+
.outerjoin(Entry.tag_box_fields)
412+
)
413+
statement = statement.options(
414+
selectinload(Entry.text_fields),
415+
selectinload(Entry.datetime_fields),
416+
selectinload(Entry.tag_box_fields)
417+
.joinedload(TagBoxField.tags)
418+
.options(selectinload(Tag.aliases), selectinload(Tag.subtags)),
419+
)
420+
entry = session.scalar(statement)
421+
if not entry:
422+
return None
423+
session.expunge(entry)
424+
make_transient(entry)
425+
return entry
426+
405427
@property
406428
def entries_count(self) -> int:
407429
with Session(self.engine) as session:
@@ -518,63 +540,18 @@ def search_library(
518540
with Session(self.engine, expire_on_commit=False) as session:
519541
statement = select(Entry)
520542

521-
if search.tag:
522-
SubtagAlias = aliased(Tag) # noqa: N806
523-
statement = (
524-
statement.join(Entry.tag_box_fields)
525-
.join(TagBoxField.tags)
526-
.outerjoin(Tag.aliases)
527-
.outerjoin(SubtagAlias, Tag.subtags)
528-
.where(
529-
or_(
530-
Tag.name.ilike(search.tag),
531-
Tag.shorthand.ilike(search.tag),
532-
TagAlias.name.ilike(search.tag),
533-
SubtagAlias.name.ilike(search.tag),
534-
)
535-
)
536-
)
537-
elif search.tag_id:
538-
statement = (
539-
statement.join(Entry.tag_box_fields)
540-
.join(TagBoxField.tags)
541-
.where(Tag.id == search.tag_id)
542-
)
543-
544-
elif search.id:
545-
statement = statement.where(Entry.id == search.id)
546-
elif search.name:
547-
statement = select(Entry).where(
548-
and_(
549-
Entry.path.ilike(f"%{search.name}%"),
550-
# dont match directory name (ie. has following slash)
551-
~Entry.path.ilike(f"%{search.name}%/%"),
552-
)
553-
)
554-
elif search.path:
555-
search_str = str(search.path).replace("*", "%")
556-
statement = statement.where(Entry.path.ilike(search_str))
557-
elif search.filetype:
558-
statement = statement.where(Entry.suffix.ilike(f"{search.filetype}"))
559-
elif search.mediatype:
560-
extensions: set[str] = set[str]()
561-
for media_cat in MediaCategories.ALL_CATEGORIES:
562-
if search.mediatype == media_cat.name:
563-
extensions = extensions | media_cat.extensions
564-
break
565-
# just need to map it to search db - suffixes do not have '.'
566-
statement = statement.where(
567-
Entry.suffix.in_(map(lambda x: x.replace(".", ""), extensions))
543+
if search.ast:
544+
statement = statement.outerjoin(Entry.tag_box_fields).where(
545+
SQLBoolExpressionBuilder(self).visit(search.ast)
568546
)
569547

570548
extensions = self.prefs(LibraryPrefs.EXTENSION_LIST)
571549
is_exclude_list = self.prefs(LibraryPrefs.IS_EXCLUDE_LIST)
572550

573-
if not search.id: # if `id` is set, we don't need to filter by extensions
574-
if extensions and is_exclude_list:
575-
statement = statement.where(Entry.suffix.notin_(extensions))
576-
elif extensions:
577-
statement = statement.where(Entry.suffix.in_(extensions))
551+
if extensions and is_exclude_list:
552+
statement = statement.where(Entry.suffix.notin_(extensions))
553+
elif extensions:
554+
statement = statement.where(Entry.suffix.in_(extensions))
578555

579556
statement = statement.options(
580557
selectinload(Entry.text_fields),
@@ -584,6 +561,8 @@ def search_library(
584561
.options(selectinload(Tag.aliases), selectinload(Tag.subtags)),
585562
)
586563

564+
statement = statement.distinct(Entry.id)
565+
587566
query_count = select(func.count()).select_from(statement.alias("entries"))
588567
count_all: int = session.execute(query_count).scalar()
589568

@@ -597,7 +576,7 @@ def search_library(
597576

598577
res = SearchResult(
599578
total_count=count_all,
600-
items=list(session.scalars(statement).unique()),
579+
items=list(session.scalars(statement)),
601580
)
602581

603582
session.expunge_all()

0 commit comments

Comments
 (0)