|
11 | 11 | Callable, |
12 | 12 | Dict, |
13 | 13 | Generator, |
14 | | - Iterable, |
15 | 14 | List, |
16 | 15 | Optional, |
17 | 16 | Sequence, |
|
27 | 26 | import sqlalchemy |
28 | 27 | from langchain_core.documents import Document |
29 | 28 | from langchain_core.embeddings import Embeddings |
| 29 | +from langchain_core.indexing import UpsertResponse |
30 | 30 | from langchain_core.utils import get_from_dict_or_env |
31 | 31 | from langchain_core.vectorstores import VectorStore |
32 | 32 | from sqlalchemy import SQLColumnExpression, cast, create_engine, delete, func, select |
@@ -714,7 +714,7 @@ async def __afrom( |
714 | 714 |
|
715 | 715 | def add_embeddings( |
716 | 716 | self, |
717 | | - texts: Iterable[str], |
| 717 | + texts: Sequence[str], |
718 | 718 | embeddings: List[List[float]], |
719 | 719 | metadatas: Optional[List[dict]] = None, |
720 | 720 | ids: Optional[List[str]] = None, |
@@ -770,7 +770,7 @@ def add_embeddings( |
770 | 770 |
|
771 | 771 | async def aadd_embeddings( |
772 | 772 | self, |
773 | | - texts: Iterable[str], |
| 773 | + texts: Sequence[str], |
774 | 774 | embeddings: List[List[float]], |
775 | 775 | metadatas: Optional[List[dict]] = None, |
776 | 776 | ids: Optional[List[str]] = None, |
@@ -824,56 +824,6 @@ async def aadd_embeddings( |
824 | 824 |
|
825 | 825 | return ids |
826 | 826 |
|
827 | | - def add_texts( |
828 | | - self, |
829 | | - texts: Iterable[str], |
830 | | - metadatas: Optional[List[dict]] = None, |
831 | | - ids: Optional[List[str]] = None, |
832 | | - **kwargs: Any, |
833 | | - ) -> List[str]: |
834 | | - """Run more texts through the embeddings and add to the vectorstore. |
835 | | -
|
836 | | - Args: |
837 | | - texts: Iterable of strings to add to the vectorstore. |
838 | | - metadatas: Optional list of metadatas associated with the texts. |
839 | | - ids: Optional list of ids for the texts. |
840 | | - If not provided, will generate a new id for each text. |
841 | | - kwargs: vectorstore specific parameters |
842 | | -
|
843 | | - Returns: |
844 | | - List of ids from adding the texts into the vectorstore. |
845 | | - """ |
846 | | - assert not self._async_engine, "This method must be called without async_mode" |
847 | | - embeddings = self.embedding_function.embed_documents(list(texts)) |
848 | | - return self.add_embeddings( |
849 | | - texts=texts, embeddings=embeddings, metadatas=metadatas, ids=ids, **kwargs |
850 | | - ) |
851 | | - |
852 | | - async def aadd_texts( |
853 | | - self, |
854 | | - texts: Iterable[str], |
855 | | - metadatas: Optional[List[dict]] = None, |
856 | | - ids: Optional[List[str]] = None, |
857 | | - **kwargs: Any, |
858 | | - ) -> List[str]: |
859 | | - """Run more texts through the embeddings and add to the vectorstore. |
860 | | -
|
861 | | - Args: |
862 | | - texts: Iterable of strings to add to the vectorstore. |
863 | | - metadatas: Optional list of metadatas associated with the texts. |
864 | | - ids: Optional list of ids for the texts. |
865 | | - If not provided, will generate a new id for each text. |
866 | | - kwargs: vectorstore specific parameters |
867 | | -
|
868 | | - Returns: |
869 | | - List of ids from adding the texts into the vectorstore. |
870 | | - """ |
871 | | - await self.__apost_init__() # Lazy async init |
872 | | - embeddings = await self.embedding_function.aembed_documents(list(texts)) |
873 | | - return await self.aadd_embeddings( |
874 | | - texts=texts, embeddings=embeddings, metadatas=metadatas, ids=ids, **kwargs |
875 | | - ) |
876 | | - |
877 | 827 | def similarity_search( |
878 | 828 | self, |
879 | 829 | query: str, |
@@ -1014,6 +964,7 @@ def _results_to_docs_and_scores(self, results: Any) -> List[Tuple[Document, floa |
1014 | 964 | docs = [ |
1015 | 965 | ( |
1016 | 966 | Document( |
| 967 | + id=str(result.EmbeddingStore.id), |
1017 | 968 | page_content=result.EmbeddingStore.document, |
1018 | 969 | metadata=result.EmbeddingStore.cmetadata, |
1019 | 970 | ), |
@@ -2178,3 +2129,112 @@ async def _make_async_session(self) -> AsyncGenerator[AsyncSession, None]: |
2178 | 2129 | ) |
2179 | 2130 | async with self.session_maker() as session: |
2180 | 2131 | yield typing_cast(AsyncSession, session) |
| 2132 | + |
| 2133 | + def upsert(self, items: Sequence[Document], /, **kwargs: Any) -> UpsertResponse: |
| 2134 | + """Upsert documents into the vectorstore. |
| 2135 | +
|
| 2136 | + Args: |
| 2137 | + items: Sequence of documents to upsert. |
| 2138 | + kwargs: vectorstore specific parameters |
| 2139 | +
|
| 2140 | + Returns: |
| 2141 | + UpsertResponse |
| 2142 | + """ |
| 2143 | + if self._async_engine: |
| 2144 | + raise AssertionError("This method must be called in sync mode.") |
| 2145 | + texts = [item.page_content for item in items] |
| 2146 | + metadatas = [item.metadata for item in items] |
| 2147 | + ids = [item.id if item.id is not None else str(uuid.uuid4()) for item in items] |
| 2148 | + embeddings = self.embedding_function.embed_documents(list(texts)) |
| 2149 | + added_ids = self.add_embeddings( |
| 2150 | + texts=texts, embeddings=embeddings, metadatas=metadatas, ids=ids, **kwargs |
| 2151 | + ) |
| 2152 | + return { |
| 2153 | + "succeeded": added_ids, |
| 2154 | + "failed": [ |
| 2155 | + item.id |
| 2156 | + for item in items |
| 2157 | + if item.id is not None and item.id not in added_ids |
| 2158 | + ], |
| 2159 | + } |
| 2160 | + |
| 2161 | + async def aupsert( |
| 2162 | + self, items: Sequence[Document], /, **kwargs: Any |
| 2163 | + ) -> UpsertResponse: |
| 2164 | + """Upsert documents into the vectorstore. |
| 2165 | +
|
| 2166 | + Args: |
| 2167 | + items: Sequence of documents to upsert. |
| 2168 | + kwargs: vectorstore specific parameters |
| 2169 | +
|
| 2170 | + Returns: |
| 2171 | + UpsertResponse |
| 2172 | + """ |
| 2173 | + if not self._async_engine: |
| 2174 | + raise AssertionError("This method must be called with async_mode") |
| 2175 | + texts = [item.page_content for item in items] |
| 2176 | + metadatas = [item.metadata for item in items] |
| 2177 | + ids = [item.id if item.id is not None else str(uuid.uuid4()) for item in items] |
| 2178 | + embeddings = await self.embedding_function.aembed_documents(list(texts)) |
| 2179 | + added_ids = await self.aadd_embeddings( |
| 2180 | + texts=texts, embeddings=embeddings, metadatas=metadatas, ids=ids, **kwargs |
| 2181 | + ) |
| 2182 | + return { |
| 2183 | + "succeeded": added_ids, |
| 2184 | + "failed": [ |
| 2185 | + item.id |
| 2186 | + for item in items |
| 2187 | + if item.id is not None and item.id not in added_ids |
| 2188 | + ], |
| 2189 | + } |
| 2190 | + |
| 2191 | + def get_by_ids(self, ids: Sequence[str], /) -> List[Document]: |
| 2192 | + """Get documents by ids.""" |
| 2193 | + documents = [] |
| 2194 | + with self._make_sync_session() as session: |
| 2195 | + collection = self.get_collection(session) |
| 2196 | + filter_by = [self.EmbeddingStore.collection_id == collection.uuid] |
| 2197 | + stmt = ( |
| 2198 | + select( |
| 2199 | + self.EmbeddingStore, |
| 2200 | + ) |
| 2201 | + .where(self.EmbeddingStore.id.in_(ids)) |
| 2202 | + .filter(*filter_by) |
| 2203 | + ) |
| 2204 | + |
| 2205 | + for result in session.execute(stmt).scalars().all(): |
| 2206 | + documents.append( |
| 2207 | + Document( |
| 2208 | + id=result.id, |
| 2209 | + page_content=result.document, |
| 2210 | + metadata=result.cmetadata, |
| 2211 | + ) |
| 2212 | + ) |
| 2213 | + return documents |
| 2214 | + |
| 2215 | + async def aget_by_ids(self, ids: Sequence[str], /) -> List[Document]: |
| 2216 | + """Get documents by ids.""" |
| 2217 | + documents = [] |
| 2218 | + async with self._make_async_session() as session: |
| 2219 | + collection = await self.aget_collection(session) |
| 2220 | + filter_by = [self.EmbeddingStore.collection_id == collection.uuid] |
| 2221 | + |
| 2222 | + stmt = ( |
| 2223 | + select( |
| 2224 | + self.EmbeddingStore, |
| 2225 | + ) |
| 2226 | + .where(self.EmbeddingStore.id.in_(ids)) |
| 2227 | + .filter(*filter_by) |
| 2228 | + ) |
| 2229 | + |
| 2230 | + results: Sequence[Any] = (await session.execute(stmt)).scalars().all() |
| 2231 | + |
| 2232 | + for result in results: |
| 2233 | + documents.append( |
| 2234 | + Document( |
| 2235 | + id=str(result.id), |
| 2236 | + page_content=result.document, |
| 2237 | + metadata=result.cmetadata, |
| 2238 | + ) |
| 2239 | + ) |
| 2240 | + return documents |
0 commit comments