Skip to content

feat: Add the PGVectorStore class #168

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
Merged
Show file tree
Hide file tree
Changes from 1 commit
Commits
Show all changes
32 commits
Select commit Hold shift + click to select a range
a21a8e2
feat: Add the PGVectorStore class
dishaprakash Mar 19, 2025
926f417
Linter and format fix
dishaprakash Mar 19, 2025
a34ddbe
update poetry lock
dishaprakash Mar 19, 2025
bdd2bf6
minor variable name change
dishaprakash Mar 19, 2025
239b1c3
Fix import test
dishaprakash Mar 19, 2025
544cade
enabled socket in one test file
dishaprakash Mar 19, 2025
03dcac1
enabled socket in all test files
dishaprakash Mar 19, 2025
7b4fa7f
Debug tests being skipped
dishaprakash Mar 19, 2025
1d42314
Debug tests being skipped
dishaprakash Mar 19, 2025
dc9a5b8
Debug tests being skipped
dishaprakash Mar 19, 2025
4c3f93f
Debug tests being failed
dishaprakash Mar 19, 2025
b3a12b7
revert debug lines
dishaprakash Mar 19, 2025
8b30833
Remove IVFIndex
dishaprakash Mar 19, 2025
1496033
Minor change
dishaprakash Mar 19, 2025
cbd0889
Review changes
dishaprakash Apr 1, 2025
b436df3
Refactor vectorstore packaging in import
dishaprakash Apr 1, 2025
eb6954d
Change test table names
dishaprakash Apr 1, 2025
3e52c56
Linter fix
dishaprakash Apr 1, 2025
a24fe73
Minor fix
dishaprakash Apr 1, 2025
c74858e
Fix test
dishaprakash Apr 1, 2025
e52e609
Fix tests
dishaprakash Apr 1, 2025
1f6a70e
Remove chat message history format
dishaprakash Apr 1, 2025
8029731
Fix test
dishaprakash Apr 1, 2025
cf58c2a
Fix indexing tests
dishaprakash Apr 1, 2025
b9526c6
Make escape sql string function private
dishaprakash Apr 1, 2025
1d6563a
Rename namespaces
dishaprakash Apr 2, 2025
c9ad8f3
Enable support for TypedDict along with Column
dishaprakash Apr 2, 2025
a913b5a
Fix import test
dishaprakash Apr 2, 2025
9e539e0
Linter fix
dishaprakash Apr 2, 2025
1daac17
Linter fix
dishaprakash Apr 2, 2025
5062185
Add validation and quotes for indexes
dishaprakash Apr 3, 2025
fe62c35
Merge branch 'pg-vectorstore' into upstream-langchain
averikitsch Apr 4, 2025
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Prev Previous commit
Next Next commit
Linter and format fix
  • Loading branch information
dishaprakash committed Mar 19, 2025
commit 926f417d2f940b392728c9b3b9193cf484df03a3
142 changes: 101 additions & 41 deletions examples/vectorstore.ipynb
Original file line number Diff line number Diff line change
Expand Up @@ -72,7 +72,7 @@
"from langchain_core.documents import Document\n",
"\n",
"# See docker command above to launch a postgres instance with pgvector enabled.\n",
"connection = \"postgresql+psycopg://langchain:langchain@localhost:6024/langchain\" \n",
"connection = \"postgresql+psycopg://langchain:langchain@localhost:6024/langchain\"\n",
"collection_name = \"my_docs\"\n",
"embeddings = CohereEmbeddings()\n",
"\n",
Expand Down Expand Up @@ -126,17 +126,47 @@
"outputs": [],
"source": [
"docs = [\n",
" Document(page_content='there are cats in the pond', metadata={\"id\": 1, \"location\": \"pond\", \"topic\": \"animals\"}),\n",
" Document(page_content='ducks are also found in the pond', metadata={\"id\": 2, \"location\": \"pond\", \"topic\": \"animals\"}),\n",
" Document(page_content='fresh apples are available at the market', metadata={\"id\": 3, \"location\": \"market\", \"topic\": \"food\"}),\n",
" Document(page_content='the market also sells fresh oranges', metadata={\"id\": 4, \"location\": \"market\", \"topic\": \"food\"}),\n",
" Document(page_content='the new art exhibit is fascinating', metadata={\"id\": 5, \"location\": \"museum\", \"topic\": \"art\"}),\n",
" Document(page_content='a sculpture exhibit is also at the museum', metadata={\"id\": 6, \"location\": \"museum\", \"topic\": \"art\"}),\n",
" Document(page_content='a new coffee shop opened on Main Street', metadata={\"id\": 7, \"location\": \"Main Street\", \"topic\": \"food\"}),\n",
" Document(page_content='the book club meets at the library', metadata={\"id\": 8, \"location\": \"library\", \"topic\": \"reading\"}),\n",
" Document(page_content='the library hosts a weekly story time for kids', metadata={\"id\": 9, \"location\": \"library\", \"topic\": \"reading\"}),\n",
" Document(page_content='a cooking class for beginners is offered at the community center', metadata={\"id\": 10, \"location\": \"community center\", \"topic\": \"classes\"})\n",
"]\n"
" Document(\n",
" page_content=\"there are cats in the pond\",\n",
" metadata={\"id\": 1, \"location\": \"pond\", \"topic\": \"animals\"},\n",
" ),\n",
" Document(\n",
" page_content=\"ducks are also found in the pond\",\n",
" metadata={\"id\": 2, \"location\": \"pond\", \"topic\": \"animals\"},\n",
" ),\n",
" Document(\n",
" page_content=\"fresh apples are available at the market\",\n",
" metadata={\"id\": 3, \"location\": \"market\", \"topic\": \"food\"},\n",
" ),\n",
" Document(\n",
" page_content=\"the market also sells fresh oranges\",\n",
" metadata={\"id\": 4, \"location\": \"market\", \"topic\": \"food\"},\n",
" ),\n",
" Document(\n",
" page_content=\"the new art exhibit is fascinating\",\n",
" metadata={\"id\": 5, \"location\": \"museum\", \"topic\": \"art\"},\n",
" ),\n",
" Document(\n",
" page_content=\"a sculpture exhibit is also at the museum\",\n",
" metadata={\"id\": 6, \"location\": \"museum\", \"topic\": \"art\"},\n",
" ),\n",
" Document(\n",
" page_content=\"a new coffee shop opened on Main Street\",\n",
" metadata={\"id\": 7, \"location\": \"Main Street\", \"topic\": \"food\"},\n",
" ),\n",
" Document(\n",
" page_content=\"the book club meets at the library\",\n",
" metadata={\"id\": 8, \"location\": \"library\", \"topic\": \"reading\"},\n",
" ),\n",
" Document(\n",
" page_content=\"the library hosts a weekly story time for kids\",\n",
" metadata={\"id\": 9, \"location\": \"library\", \"topic\": \"reading\"},\n",
" ),\n",
" Document(\n",
" page_content=\"a cooking class for beginners is offered at the community center\",\n",
" metadata={\"id\": 10, \"location\": \"community center\", \"topic\": \"classes\"},\n",
" ),\n",
"]"
]
},
{
Expand All @@ -159,7 +189,7 @@
}
],
"source": [
"vectorstore.add_documents(docs, ids=[doc.metadata['id'] for doc in docs])"
"vectorstore.add_documents(docs, ids=[doc.metadata[\"id\"] for doc in docs])"
]
},
{
Expand Down Expand Up @@ -191,7 +221,7 @@
}
],
"source": [
"vectorstore.similarity_search('kitty', k=10)"
"vectorstore.similarity_search(\"kitty\", k=10)"
]
},
{
Expand All @@ -212,17 +242,47 @@
"outputs": [],
"source": [
"docs = [\n",
" Document(page_content='there are cats in the pond', metadata={\"id\": 1, \"location\": \"pond\", \"topic\": \"animals\"}),\n",
" Document(page_content='ducks are also found in the pond', metadata={\"id\": 2, \"location\": \"pond\", \"topic\": \"animals\"}),\n",
" Document(page_content='fresh apples are available at the market', metadata={\"id\": 3, \"location\": \"market\", \"topic\": \"food\"}),\n",
" Document(page_content='the market also sells fresh oranges', metadata={\"id\": 4, \"location\": \"market\", \"topic\": \"food\"}),\n",
" Document(page_content='the new art exhibit is fascinating', metadata={\"id\": 5, \"location\": \"museum\", \"topic\": \"art\"}),\n",
" Document(page_content='a sculpture exhibit is also at the museum', metadata={\"id\": 6, \"location\": \"museum\", \"topic\": \"art\"}),\n",
" Document(page_content='a new coffee shop opened on Main Street', metadata={\"id\": 7, \"location\": \"Main Street\", \"topic\": \"food\"}),\n",
" Document(page_content='the book club meets at the library', metadata={\"id\": 8, \"location\": \"library\", \"topic\": \"reading\"}),\n",
" Document(page_content='the library hosts a weekly story time for kids', metadata={\"id\": 9, \"location\": \"library\", \"topic\": \"reading\"}),\n",
" Document(page_content='a cooking class for beginners is offered at the community center', metadata={\"id\": 10, \"location\": \"community center\", \"topic\": \"classes\"})\n",
"]\n"
" Document(\n",
" page_content=\"there are cats in the pond\",\n",
" metadata={\"id\": 1, \"location\": \"pond\", \"topic\": \"animals\"},\n",
" ),\n",
" Document(\n",
" page_content=\"ducks are also found in the pond\",\n",
" metadata={\"id\": 2, \"location\": \"pond\", \"topic\": \"animals\"},\n",
" ),\n",
" Document(\n",
" page_content=\"fresh apples are available at the market\",\n",
" metadata={\"id\": 3, \"location\": \"market\", \"topic\": \"food\"},\n",
" ),\n",
" Document(\n",
" page_content=\"the market also sells fresh oranges\",\n",
" metadata={\"id\": 4, \"location\": \"market\", \"topic\": \"food\"},\n",
" ),\n",
" Document(\n",
" page_content=\"the new art exhibit is fascinating\",\n",
" metadata={\"id\": 5, \"location\": \"museum\", \"topic\": \"art\"},\n",
" ),\n",
" Document(\n",
" page_content=\"a sculpture exhibit is also at the museum\",\n",
" metadata={\"id\": 6, \"location\": \"museum\", \"topic\": \"art\"},\n",
" ),\n",
" Document(\n",
" page_content=\"a new coffee shop opened on Main Street\",\n",
" metadata={\"id\": 7, \"location\": \"Main Street\", \"topic\": \"food\"},\n",
" ),\n",
" Document(\n",
" page_content=\"the book club meets at the library\",\n",
" metadata={\"id\": 8, \"location\": \"library\", \"topic\": \"reading\"},\n",
" ),\n",
" Document(\n",
" page_content=\"the library hosts a weekly story time for kids\",\n",
" metadata={\"id\": 9, \"location\": \"library\", \"topic\": \"reading\"},\n",
" ),\n",
" Document(\n",
" page_content=\"a cooking class for beginners is offered at the community center\",\n",
" metadata={\"id\": 10, \"location\": \"community center\", \"topic\": \"classes\"},\n",
" ),\n",
"]"
]
},
{
Expand Down Expand Up @@ -275,9 +335,7 @@
}
],
"source": [
"vectorstore.similarity_search('kitty', k=10, filter={\n",
" 'id': {'$in': [1, 5, 2, 9]}\n",
"})"
"vectorstore.similarity_search(\"kitty\", k=10, filter={\"id\": {\"$in\": [1, 5, 2, 9]}})"
]
},
{
Expand Down Expand Up @@ -309,10 +367,11 @@
}
],
"source": [
"vectorstore.similarity_search('ducks', k=10, filter={\n",
" 'id': {'$in': [1, 5, 2, 9]},\n",
" 'location': {'$in': [\"pond\", \"market\"]}\n",
"})"
"vectorstore.similarity_search(\n",
" \"ducks\",\n",
" k=10,\n",
" filter={\"id\": {\"$in\": [1, 5, 2, 9]}, \"location\": {\"$in\": [\"pond\", \"market\"]}},\n",
")"
]
},
{
Expand All @@ -336,12 +395,15 @@
}
],
"source": [
"vectorstore.similarity_search('ducks', k=10, filter={\n",
" '$and': [\n",
" {'id': {'$in': [1, 5, 2, 9]}},\n",
" {'location': {'$in': [\"pond\", \"market\"]}},\n",
" ]\n",
"}\n",
"vectorstore.similarity_search(\n",
" \"ducks\",\n",
" k=10,\n",
" filter={\n",
" \"$and\": [\n",
" {\"id\": {\"$in\": [1, 5, 2, 9]}},\n",
" {\"location\": {\"$in\": [\"pond\", \"market\"]}},\n",
" ]\n",
" },\n",
")"
]
},
Expand Down Expand Up @@ -372,9 +434,7 @@
}
],
"source": [
"vectorstore.similarity_search('bird', k=10, filter={\n",
" 'location': { \"$ne\": 'pond'}\n",
"})"
"vectorstore.similarity_search(\"bird\", k=10, filter={\"location\": {\"$ne\": \"pond\"}})"
]
}
],
Expand Down
3 changes: 1 addition & 2 deletions langchain_postgres/__init__.py
Original file line number Diff line number Diff line change
@@ -1,7 +1,6 @@
from importlib import metadata

from langchain_postgres.chat_message_histories import \
PostgresChatMessageHistory
from langchain_postgres.chat_message_histories import PostgresChatMessageHistory
from langchain_postgres.engine import Column, PGEngine
from langchain_postgres.translator import PGVectorTranslator
from langchain_postgres.vectorstore import PGVectorStore
Expand Down
13 changes: 9 additions & 4 deletions langchain_postgres/async_vectorstore.py
Original file line number Diff line number Diff line change
Expand Up @@ -14,9 +14,14 @@
from sqlalchemy.ext.asyncio import AsyncEngine

from .engine import PGEngine
from .indexes import (DEFAULT_DISTANCE_STRATEGY, DEFAULT_INDEX_NAME_SUFFIX,
BaseIndex, DistanceStrategy, ExactNearestNeighbor,
QueryOptions)
from .indexes import (
DEFAULT_DISTANCE_STRATEGY,
DEFAULT_INDEX_NAME_SUFFIX,
BaseIndex,
DistanceStrategy,
ExactNearestNeighbor,
QueryOptions,
)

COMPARISONS_TO_NATIVE = {
"$eq": "=",
Expand Down Expand Up @@ -775,7 +780,7 @@ async def aapply_vector_index(
if index.name == None:
index.name = self.table_name + DEFAULT_INDEX_NAME_SUFFIX
name = index.name
stmt = f"CREATE INDEX {'CONCURRENTLY' if concurrently else ''} {name} ON \"{self.schema_name}\".\"{self.table_name}\" USING {index.index_type} ({self.embedding_column} {function}) {params} {filter};"
stmt = f'CREATE INDEX {"CONCURRENTLY" if concurrently else ""} {name} ON "{self.schema_name}"."{self.table_name}" USING {index.index_type} ({self.embedding_column} {function}) {params} {filter};'
if concurrently:
async with self.engine.connect() as conn:
await conn.execute(text("COMMIT"))
Expand Down
1 change: 1 addition & 0 deletions langchain_postgres/chat_message_histories.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,7 @@

This client provides support for both sync and async via psycopg 3.
"""

from __future__ import annotations

import json
Expand Down
8 changes: 6 additions & 2 deletions langchain_postgres/vectorstore.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,8 +9,12 @@

from .async_vectorstore import AsyncPGVectorStore
from .engine import PGEngine
from .indexes import (DEFAULT_DISTANCE_STRATEGY, BaseIndex, DistanceStrategy,
QueryOptions)
from .indexes import (
DEFAULT_DISTANCE_STRATEGY,
BaseIndex,
DistanceStrategy,
QueryOptions,
)


class PGVectorStore(VectorStore):
Expand Down
1 change: 1 addition & 0 deletions pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -18,6 +18,7 @@ psycopg-pool = "^3.2.1"
sqlalchemy = "^2"
pgvector = "<0.4"
numpy = ">=1.21"
asyncpg = ">=0.30.0"

[tool.poetry.group.docs.dependencies]

Expand Down
1 change: 1 addition & 0 deletions tests/unit_tests/fake_embeddings.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,5 @@
"""Copied from community."""

from typing import List

from langchain_core.embeddings import Embeddings
Expand Down
1 change: 1 addition & 0 deletions tests/unit_tests/fixtures/filtering_test_cases.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,5 @@
"""Module needs to move to a stasndalone package."""

from langchain_core.documents import Document

metadatas = [
Expand Down
5 changes: 2 additions & 3 deletions tests/unit_tests/test_async_pg_vectorstore.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,8 +10,7 @@

from langchain_postgres import Column, PGEngine
from langchain_postgres.async_vectorstore import AsyncPGVectorStore
from tests.utils import \
VECTORSTORE_CONNECTION_STRING_ASYNCPG as CONNECTION_STRING
from tests.utils import VECTORSTORE_CONNECTION_STRING_ASYNCPG as CONNECTION_STRING

DEFAULT_TABLE = "test_table" + str(uuid.uuid4())
DEFAULT_TABLE_SYNC = "test_table_sync" + str(uuid.uuid4())
Expand Down Expand Up @@ -43,7 +42,7 @@ async def afetch(engine: PGEngine, query: str) -> Sequence[RowMapping]:
return result_fetch


@pytest.mark.asyncio(loop_scope="class")
@pytest.mark.asyncio(cope="class")
class TestVectorStore:
@pytest_asyncio.fixture(scope="class")
async def engine(self) -> AsyncIterator[PGEngine]:
Expand Down
3 changes: 1 addition & 2 deletions tests/unit_tests/test_async_pg_vectorstore_from_methods.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,8 +11,7 @@

from langchain_postgres import Column, PGEngine
from langchain_postgres.async_vectorstore import AsyncPGVectorStore
from tests.utils import \
VECTORSTORE_CONNECTION_STRING_ASYNCPG as CONNECTION_STRING
from tests.utils import VECTORSTORE_CONNECTION_STRING_ASYNCPG as CONNECTION_STRING

DEFAULT_TABLE = "test_table" + str(uuid.uuid4()).replace("-", "_")
DEFAULT_TABLE_SYNC = "test_table_sync" + str(uuid.uuid4()).replace("-", "_")
Expand Down
14 changes: 8 additions & 6 deletions tests/unit_tests/test_async_pg_vectorstore_index.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,11 +10,13 @@

from langchain_postgres import PGEngine
from langchain_postgres.async_vectorstore import AsyncPGVectorStore
from langchain_postgres.indexes import (DEFAULT_INDEX_NAME_SUFFIX,
DistanceStrategy, HNSWIndex,
IVFFlatIndex)
from tests.utils import \
VECTORSTORE_CONNECTION_STRING_ASYNCPG as CONNECTION_STRING
from langchain_postgres.indexes import (
DEFAULT_INDEX_NAME_SUFFIX,
DistanceStrategy,
HNSWIndex,
IVFFlatIndex,
)
from tests.utils import VECTORSTORE_CONNECTION_STRING_ASYNCPG as CONNECTION_STRING

DEFAULT_TABLE = "test_table" + str(uuid.uuid4()).replace("-", "_")
DEFAULT_INDEX_NAME = DEFAULT_TABLE + DEFAULT_INDEX_NAME_SUFFIX
Expand Down Expand Up @@ -45,7 +47,7 @@ async def aexecute(engine: PGEngine, query: str) -> None:
await conn.commit()


@pytest.mark.asyncio(loop_scope="class")
@pytest.mark.asyncio(cope="class")
class TestIndex:
@pytest_asyncio.fixture(scope="class")
async def engine(self) -> AsyncIterator[PGEngine]:
Expand Down
12 changes: 7 additions & 5 deletions tests/unit_tests/test_async_pg_vectorstore_search.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,9 +12,10 @@
from langchain_postgres.async_vectorstore import AsyncPGVectorStore
from langchain_postgres.indexes import DistanceStrategy, HNSWQueryOptions
from tests.unit_tests.fixtures.metadata_filtering_data import (
FILTERING_TEST_CASES, METADATAS)
from tests.utils import \
VECTORSTORE_CONNECTION_STRING_ASYNCPG as CONNECTION_STRING
FILTERING_TEST_CASES,
METADATAS,
)
from tests.utils import VECTORSTORE_CONNECTION_STRING_ASYNCPG as CONNECTION_STRING

DEFAULT_TABLE = "test_table" + str(uuid.uuid4()).replace("-", "_")
CUSTOM_TABLE = "test_table_custom" + str(uuid.uuid4()).replace("-", "_")
Expand Down Expand Up @@ -58,7 +59,7 @@ async def aexecute(
await conn.commit()


@pytest.mark.asyncio(loop_scope="class")
@pytest.mark.asyncio(cope="class")
class TestVectorStoreSearch:
@pytest_asyncio.fixture(scope="class")
async def engine(self) -> AsyncIterator[PGEngine]:
Expand Down Expand Up @@ -212,7 +213,8 @@ async def test_similarity_search_with_relevance_scores_threshold_euclidean(

score_threshold = {"score_threshold": 0.9}
results = await vs.asimilarity_search_with_relevance_scores(
"foo", **score_threshold # type: ignore
"foo",
**score_threshold, # type: ignore
)
assert len(results) == 1
assert results[0][0] == Document(page_content="foo", id=ids[0])
Expand Down
5 changes: 1 addition & 4 deletions tests/unit_tests/test_engine.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,8 +12,7 @@
from sqlalchemy.pool import NullPool

from langchain_postgres import Column, PGEngine
from tests.utils import \
VECTORSTORE_CONNECTION_STRING_ASYNCPG as CONNECTION_STRING
from tests.utils import VECTORSTORE_CONNECTION_STRING_ASYNCPG as CONNECTION_STRING

DEFAULT_TABLE = "test_table" + str(uuid.uuid4()).replace("-", "_")
CUSTOM_TABLE = "test_table_custom" + str(uuid.uuid4()).replace("-", "_")
Expand All @@ -26,7 +25,6 @@
VECTOR_SIZE = 768

embeddings_service = DeterministicFakeEmbedding(size=VECTOR_SIZE)
host = os.environ["IP_ADDRESS"]


def get_env_var(key: str, desc: str) -> str:
Expand Down Expand Up @@ -135,7 +133,6 @@ async def test_init_table_with_int_id(self, engine: PGEngine) -> None:
assert row in expected

async def test_from_engine(self) -> None:

engine = create_async_engine(
CONNECTION_STRING,
)
Expand Down
Loading
Loading