Skip to content

Commit

Permalink
feat: add pgvector full_text_search (langgenius#7396)
Browse files Browse the repository at this point in the history
  • Loading branch information
jasonkang14 authored and JunXu01 committed Nov 9, 2024
1 parent 1d6f270 commit ebabb16
Show file tree
Hide file tree
Showing 2 changed files with 21 additions and 6 deletions.
23 changes: 21 additions & 2 deletions api/core/rag/datasource/vdb/pgvector/pgvector.py
Original file line number Diff line number Diff line change
Expand Up @@ -152,8 +152,27 @@ def search_by_vector(self, query_vector: list[float], **kwargs: Any) -> list[Doc
return docs

def search_by_full_text(self, query: str, **kwargs: Any) -> list[Document]:
# do not support bm25 search
return []
top_k = kwargs.get("top_k", 5)

with self._get_cursor() as cur:
cur.execute(
f"""SELECT meta, text, ts_rank(to_tsvector(coalesce(text, '')), to_tsquery(%s)) AS score
FROM {self.table_name}
WHERE to_tsvector(text) @@ plainto_tsquery(%s)
ORDER BY score DESC
LIMIT {top_k}""",
# f"'{query}'" is required in order to account for whitespace in query
(f"'{query}'", f"'{query}'"),
)

docs = []

for record in cur:
metadata, text, score = record
metadata["score"] = score
docs.append(Document(page_content=text, metadata=metadata))

return docs

def delete(self) -> None:
with self._get_cursor() as cur:
Expand Down
4 changes: 0 additions & 4 deletions api/tests/integration_tests/vdb/pgvector/test_pgvector.py
Original file line number Diff line number Diff line change
Expand Up @@ -21,10 +21,6 @@ def __init__(self):
),
)

def search_by_full_text(self):
hits_by_full_text: list[Document] = self.vector.search_by_full_text(query=get_example_text())
assert len(hits_by_full_text) == 0


def test_pgvector(setup_mock_redis):
PGVectorTest().run_all_tests()

0 comments on commit ebabb16

Please sign in to comment.