Skip to content

Commit 039f8f1

Browse files
Dongercaidongdev2049
authored
Add the usage of SSL certificates for Elasticsearch and user password authentication (langchain-ai#5058)
Enhance the code to support SSL authentication for Elasticsearch when using the VectorStore module, as previous versions did not provide this capability. @dev2049 --------- Co-authored-by: caidong <zhucaidong1992@gmail.com> Co-authored-by: Dev 2049 <dev.dev2049@gmail.com>
1 parent 44dc959 commit 039f8f1

File tree

2 files changed

+42
-51
lines changed

2 files changed

+42
-51
lines changed

langchain/vectorstores/elastic_vector_search.py

Lines changed: 25 additions & 51 deletions
Original file line numberDiff line numberDiff line change
@@ -7,7 +7,7 @@
77

88
from langchain.docstore.document import Document
99
from langchain.embeddings.base import Embeddings
10-
from langchain.utils import get_from_dict_or_env
10+
from langchain.utils import get_from_env
1111
from langchain.vectorstores.base import VectorStore
1212

1313

@@ -114,24 +114,31 @@ class ElasticVectorSearch(VectorStore, ABC):
114114
ValueError: If the elasticsearch python package is not installed.
115115
"""
116116

117-
def __init__(self, elasticsearch_url: str, index_name: str, embedding: Embeddings):
117+
def __init__(
118+
self,
119+
elasticsearch_url: str,
120+
index_name: str,
121+
embedding: Embeddings,
122+
*,
123+
ssl_verify: Optional[Dict[str, Any]] = None,
124+
):
118125
"""Initialize with necessary components."""
119126
try:
120127
import elasticsearch
121128
except ImportError:
122-
raise ValueError(
129+
raise ImportError(
123130
"Could not import elasticsearch python package. "
124131
"Please install it with `pip install elasticsearch`."
125132
)
126133
self.embedding = embedding
127134
self.index_name = index_name
135+
_ssl_verify = ssl_verify or {}
128136
try:
129-
es_client = elasticsearch.Elasticsearch(elasticsearch_url) # noqa
137+
self.client = elasticsearch.Elasticsearch(elasticsearch_url, **_ssl_verify)
130138
except ValueError as e:
131139
raise ValueError(
132-
f"Your elasticsearch client string is misformatted. Got error: {e} "
140+
f"Your elasticsearch client string is mis-formatted. Got error: {e} "
133141
)
134-
self.client = es_client
135142

136143
def add_texts(
137144
self,
@@ -154,7 +161,7 @@ def add_texts(
154161
from elasticsearch.exceptions import NotFoundError
155162
from elasticsearch.helpers import bulk
156163
except ImportError:
157-
raise ValueError(
164+
raise ImportError(
158165
"Could not import elasticsearch python package. "
159166
"Please install it with `pip install elasticsearch`."
160167
)
@@ -239,6 +246,9 @@ def from_texts(
239246
texts: List[str],
240247
embedding: Embeddings,
241248
metadatas: Optional[List[dict]] = None,
249+
elasticsearch_url: Optional[str] = None,
250+
index_name: Optional[str] = None,
251+
refresh_indices: bool = True,
242252
**kwargs: Any,
243253
) -> ElasticVectorSearch:
244254
"""Construct ElasticVectorSearch wrapper from raw documents.
@@ -262,48 +272,12 @@ def from_texts(
262272
elasticsearch_url="http://localhost:9200"
263273
)
264274
"""
265-
elasticsearch_url = get_from_dict_or_env(
266-
kwargs, "elasticsearch_url", "ELASTICSEARCH_URL"
275+
elasticsearch_url = elasticsearch_url or get_from_env(
276+
"elasticsearch_url", "ELASTICSEARCH_URL"
267277
)
268-
try:
269-
import elasticsearch
270-
from elasticsearch.exceptions import NotFoundError
271-
from elasticsearch.helpers import bulk
272-
except ImportError:
273-
raise ValueError(
274-
"Could not import elasticsearch python package. "
275-
"Please install it with `pip install elasticsearch`."
276-
)
277-
try:
278-
client = elasticsearch.Elasticsearch(elasticsearch_url)
279-
except ValueError as e:
280-
raise ValueError(
281-
"Your elasticsearch client string is misformatted. " f"Got error: {e} "
282-
)
283-
index_name = kwargs.get("index_name", uuid.uuid4().hex)
284-
embeddings = embedding.embed_documents(texts)
285-
dim = len(embeddings[0])
286-
mapping = _default_text_mapping(dim)
287-
288-
# check to see if the index already exists
289-
try:
290-
client.indices.get(index=index_name)
291-
except NotFoundError:
292-
# TODO would be nice to create index before embedding,
293-
# just to save expensive steps for last
294-
client.indices.create(index=index_name, mappings=mapping)
295-
296-
requests = []
297-
for i, text in enumerate(texts):
298-
metadata = metadatas[i] if metadatas else {}
299-
request = {
300-
"_op_type": "index",
301-
"_index": index_name,
302-
"vector": embeddings[i],
303-
"text": text,
304-
"metadata": metadata,
305-
}
306-
requests.append(request)
307-
bulk(client, requests)
308-
client.indices.refresh(index=index_name)
309-
return cls(elasticsearch_url, index_name, embedding)
278+
index_name = index_name or uuid.uuid4().hex
279+
vectorsearch = cls(elasticsearch_url, index_name, embedding, **kwargs)
280+
vectorsearch.add_texts(
281+
texts, metadatas=metadatas, refresh_indices=refresh_indices
282+
)
283+
return vectorsearch

tests/integration_tests/vectorstores/test_elasticsearch.py

Lines changed: 17 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -48,6 +48,23 @@ def test_similarity_search_without_metadata(self, elasticsearch_url: str) -> Non
4848
output = docsearch.similarity_search("foo", k=1)
4949
assert output == [Document(page_content="foo")]
5050

51+
def test_similarity_search_with_ssl_verify(self, elasticsearch_url: str) -> None:
52+
"""Test end to end construction and search with ssl verify."""
53+
ssl_verify = {
54+
"verify_certs": True,
55+
"basic_auth": ("ES_USER", "ES_PASSWORD"),
56+
"ca_certs": "ES_CA_CERTS_PATH",
57+
}
58+
texts = ["foo", "bar", "baz"]
59+
docsearch = ElasticVectorSearch.from_texts(
60+
texts,
61+
FakeEmbeddings(),
62+
elasticsearch_url=elasticsearch_url,
63+
ssl_verify=ssl_verify,
64+
)
65+
output = docsearch.similarity_search("foo", k=1)
66+
assert output == [Document(page_content="foo")]
67+
5168
def test_similarity_search_with_metadata(self, elasticsearch_url: str) -> None:
5269
"""Test end to end construction and search with metadata."""
5370
texts = ["foo", "bar", "baz"]

0 commit comments

Comments
 (0)