Skip to content

Commit

Permalink
feature: in similarity_search, include keys in returned Documents whe…
Browse files Browse the repository at this point in the history
…n return_all=True
  • Loading branch information
bsbodden committed Dec 10, 2024
1 parent 2bdf379 commit c77be6d
Show file tree
Hide file tree
Showing 3 changed files with 97 additions and 2 deletions.
13 changes: 11 additions & 2 deletions libs/redis/langchain_redis/vectorstores.py
Original file line number Diff line number Diff line change
Expand Up @@ -834,14 +834,16 @@ def similarity_search_by_vector(

return [
Document(
id=result[self.config.id_field],
page_content=doc[self.config.content_field],
metadata={
k: v
for k, v in doc.items()
if k != self.config.content_field
},
)
for doc in full_docs
for doc, result in zip(full_docs, results)
if doc is not None # Handle potential missing documents
]
else:
# Fetch full JSON data for each document
Expand All @@ -855,14 +857,15 @@ def similarity_search_by_vector(

return [
Document(
id=result[self.config.id_field],
page_content=doc[self.config.content_field],
metadata={
k: v
for k, v in doc.items()
if k != self.config.content_field
},
)
for doc in full_docs
for doc, result in zip(full_docs, results)
if doc is not None # Handle potential missing documents
]

Expand Down Expand Up @@ -1034,6 +1037,7 @@ def similarity_search_with_score_by_vector(
docs_with_scores = [
(
Document(
id=result[self.config.id_field],
page_content=doc[self.config.content_field],
metadata={
k: v
Expand All @@ -1048,6 +1052,7 @@ def similarity_search_with_score_by_vector(
),
)
for doc, result in zip(full_docs, results)
if doc is not None
]
else:
docs_with_scores = [
Expand All @@ -1058,6 +1063,7 @@ def similarity_search_with_score_by_vector(
],
(
Document(
id=result[self.config.id_field],
page_content=doc[self.config.content_field],
metadata={
k: v
Expand All @@ -1069,6 +1075,7 @@ def similarity_search_with_score_by_vector(
),
)
for doc, result in zip(full_docs, results)
if doc is not None
]
else:
# Fetch full JSON data for each document
Expand All @@ -1079,6 +1086,7 @@ def similarity_search_with_score_by_vector(
docs_with_scores = [
(
Document(
id=result[self.config.id_field],
page_content=doc[self.config.content_field],
metadata={
k: v
Expand All @@ -1090,6 +1098,7 @@ def similarity_search_with_score_by_vector(
doc.get(self.config.embedding_field),
)
for doc, result in zip(full_docs, results)
if doc is not None
]
else:
docs_with_scores = [
Expand Down
43 changes: 43 additions & 0 deletions libs/redis/tests/integration_tests/test_vectorstores_hash.py
Original file line number Diff line number Diff line change
Expand Up @@ -297,6 +297,49 @@ def test_custom_keys_from_docs(texts: List[str], redis_url: str) -> None:
)
# test all keys are stored
assert client.hget(f"{vector_store.key_prefix}:test_key_2", "text")
# test all keys in are the same as the keys_out
assert [f"tst8:{key}" for key in keys_in] == keys_out
# Clean up
vector_store.index.delete(drop=True)


def test_similarity_search_returns_keys(redis_url: str) -> None:
ids = ["test_key_1", "test_key_2", "test_key_3"]
keys = ["wids:test_key_1", "wids:test_key_2", "wids:test_key_3"]
texts = [
"The quick brown fox jumps over the lazy dog",
"The lazy dog is jumped over by the quick brown fox",
"The fox is quick and brown, and jumps over dogs",
]
docs = [Document(page_content=t) for t in texts]

index_name = f"test_index_{str(ULID())}"
result = RedisVectorStore.from_documents(
docs,
OpenAIEmbeddings(),
index_name=index_name,
key_prefix="wids",
keys=ids,
return_keys=True,
redis_url=redis_url,
storage_type="hash",
)
vector_store, _ = cast(Tuple[RedisVectorStore, List[str]], result)

# Create embeddings
embeddings = OpenAIEmbeddings()

# Perform similarity search without return_all
query_embedding = embeddings.embed_query("quick fox")
results_without_return_all = vector_store.similarity_search_by_vector(
query_embedding, k=2, return_all=True
)

assert len(results_without_return_all) == 2
for doc in results_without_return_all:
assert doc.page_content in texts
assert doc.id in keys

# Clean up
vector_store.index.delete(drop=True)

Expand Down
43 changes: 43 additions & 0 deletions libs/redis/tests/integration_tests/test_vectorstores_json.py
Original file line number Diff line number Diff line change
Expand Up @@ -305,6 +305,49 @@ def test_custom_keys_from_docs(texts: List[str], redis_url: str) -> None:
assert client.json().get(f"{vector_store.key_prefix}:test_key_1", "a") == "b"
# test all keys are stored
assert client.json().get(f"{vector_store.key_prefix}:test_key_2", "text")
# test all keys in are the same as the keys_out
assert [f"tst8:{key}" for key in keys_in] == keys_out
# Clean up
vector_store.index.delete(drop=True)


def test_similarity_search_returns_keys(redis_url: str) -> None:
ids = ["test_key_1", "test_key_2", "test_key_3"]
keys = ["wids:test_key_1", "wids:test_key_2", "wids:test_key_3"]
texts = [
"The quick brown fox jumps over the lazy dog",
"The lazy dog is jumped over by the quick brown fox",
"The fox is quick and brown, and jumps over dogs",
]
docs = [Document(page_content=t) for t in texts]

index_name = f"test_index_{str(ULID())}"
result = RedisVectorStore.from_documents(
docs,
OpenAIEmbeddings(),
index_name=index_name,
key_prefix="wids",
keys=ids,
return_keys=True,
redis_url=redis_url,
storage_type="json",
)
vector_store, _ = cast(Tuple[RedisVectorStore, List[str]], result)

# Create embeddings
embeddings = OpenAIEmbeddings()

# Perform similarity search without return_all
query_embedding = embeddings.embed_query("quick fox")
results_without_return_all = vector_store.similarity_search_by_vector(
query_embedding, k=2, return_all=True
)

assert len(results_without_return_all) == 2
for doc in results_without_return_all:
assert doc.page_content in texts
assert doc.id in keys

# Clean up
vector_store.index.delete(drop=True)

Expand Down

0 comments on commit c77be6d

Please sign in to comment.