7
7
8
8
from langchain .docstore .document import Document
9
9
from langchain .embeddings .base import Embeddings
10
- from langchain .utils import get_from_dict_or_env
10
+ from langchain .utils import get_from_env
11
11
from langchain .vectorstores .base import VectorStore
12
12
13
13
@@ -114,24 +114,31 @@ class ElasticVectorSearch(VectorStore, ABC):
114
114
ValueError: If the elasticsearch python package is not installed.
115
115
"""
116
116
117
- def __init__ (self , elasticsearch_url : str , index_name : str , embedding : Embeddings ):
117
+ def __init__ (
118
+ self ,
119
+ elasticsearch_url : str ,
120
+ index_name : str ,
121
+ embedding : Embeddings ,
122
+ * ,
123
+ ssl_verify : Optional [Dict [str , Any ]] = None ,
124
+ ):
118
125
"""Initialize with necessary components."""
119
126
try :
120
127
import elasticsearch
121
128
except ImportError :
122
- raise ValueError (
129
+ raise ImportError (
123
130
"Could not import elasticsearch python package. "
124
131
"Please install it with `pip install elasticsearch`."
125
132
)
126
133
self .embedding = embedding
127
134
self .index_name = index_name
135
+ _ssl_verify = ssl_verify or {}
128
136
try :
129
- es_client = elasticsearch .Elasticsearch (elasticsearch_url ) # noqa
137
+ self . client = elasticsearch .Elasticsearch (elasticsearch_url , ** _ssl_verify )
130
138
except ValueError as e :
131
139
raise ValueError (
132
- f"Your elasticsearch client string is misformatted . Got error: { e } "
140
+ f"Your elasticsearch client string is mis-formatted . Got error: { e } "
133
141
)
134
- self .client = es_client
135
142
136
143
def add_texts (
137
144
self ,
@@ -154,7 +161,7 @@ def add_texts(
154
161
from elasticsearch .exceptions import NotFoundError
155
162
from elasticsearch .helpers import bulk
156
163
except ImportError :
157
- raise ValueError (
164
+ raise ImportError (
158
165
"Could not import elasticsearch python package. "
159
166
"Please install it with `pip install elasticsearch`."
160
167
)
@@ -239,6 +246,9 @@ def from_texts(
239
246
texts : List [str ],
240
247
embedding : Embeddings ,
241
248
metadatas : Optional [List [dict ]] = None ,
249
+ elasticsearch_url : Optional [str ] = None ,
250
+ index_name : Optional [str ] = None ,
251
+ refresh_indices : bool = True ,
242
252
** kwargs : Any ,
243
253
) -> ElasticVectorSearch :
244
254
"""Construct ElasticVectorSearch wrapper from raw documents.
@@ -262,48 +272,12 @@ def from_texts(
262
272
elasticsearch_url="http://localhost:9200"
263
273
)
264
274
"""
265
- elasticsearch_url = get_from_dict_or_env (
266
- kwargs , "elasticsearch_url" , "ELASTICSEARCH_URL"
275
+ elasticsearch_url = elasticsearch_url or get_from_env (
276
+ "elasticsearch_url" , "ELASTICSEARCH_URL"
267
277
)
268
- try :
269
- import elasticsearch
270
- from elasticsearch .exceptions import NotFoundError
271
- from elasticsearch .helpers import bulk
272
- except ImportError :
273
- raise ValueError (
274
- "Could not import elasticsearch python package. "
275
- "Please install it with `pip install elasticsearch`."
276
- )
277
- try :
278
- client = elasticsearch .Elasticsearch (elasticsearch_url )
279
- except ValueError as e :
280
- raise ValueError (
281
- "Your elasticsearch client string is misformatted. " f"Got error: { e } "
282
- )
283
- index_name = kwargs .get ("index_name" , uuid .uuid4 ().hex )
284
- embeddings = embedding .embed_documents (texts )
285
- dim = len (embeddings [0 ])
286
- mapping = _default_text_mapping (dim )
287
-
288
- # check to see if the index already exists
289
- try :
290
- client .indices .get (index = index_name )
291
- except NotFoundError :
292
- # TODO would be nice to create index before embedding,
293
- # just to save expensive steps for last
294
- client .indices .create (index = index_name , mappings = mapping )
295
-
296
- requests = []
297
- for i , text in enumerate (texts ):
298
- metadata = metadatas [i ] if metadatas else {}
299
- request = {
300
- "_op_type" : "index" ,
301
- "_index" : index_name ,
302
- "vector" : embeddings [i ],
303
- "text" : text ,
304
- "metadata" : metadata ,
305
- }
306
- requests .append (request )
307
- bulk (client , requests )
308
- client .indices .refresh (index = index_name )
309
- return cls (elasticsearch_url , index_name , embedding )
278
+ index_name = index_name or uuid .uuid4 ().hex
279
+ vectorsearch = cls (elasticsearch_url , index_name , embedding , ** kwargs )
280
+ vectorsearch .add_texts (
281
+ texts , metadatas = metadatas , refresh_indices = refresh_indices
282
+ )
283
+ return vectorsearch
0 commit comments