-
-
Notifications
You must be signed in to change notification settings - Fork 89
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
feat(vector-stores): Add support for Chroma VectorStore (#139)
- Loading branch information
1 parent
5fdcbc5
commit 098783b
Showing
13 changed files
with
396 additions
and
11 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -1,2 +1,4 @@ | ||
/// Chroma module for LangChain.dart. | ||
/// LangChain.dart integration module for Chroma open-source embedding database. | ||
library; | ||
|
||
export 'src/vector_stores/vector_stores.dart'; |
187 changes: 187 additions & 0 deletions
187
packages/langchain_chroma/lib/src/vector_stores/chroma.dart
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,187 @@ | ||
import 'package:chromadb/chromadb.dart'; | ||
import 'package:http/http.dart' as http; | ||
import 'package:langchain/langchain.dart'; | ||
import 'package:uuid/uuid.dart'; | ||
|
||
import 'models/models.dart'; | ||
|
||
/// {@template chroma} | ||
/// Vector store for Chroma open-source embedding database. | ||
/// | ||
/// Chroma documentation: | ||
/// https://docs.trychroma.com | ||
/// | ||
/// This vector stores requires Chroma to be running in client/server mode. | ||
/// | ||
/// The server can run on your local computer via docker or be easily deployed | ||
/// to any cloud provider. | ||
/// | ||
/// To run Chroma in client server mode, run the docker container: | ||
/// ``` | ||
/// docker-compose up -d --build | ||
/// ``` | ||
/// | ||
/// By default, the Chroma client will connect to a server running on | ||
/// `http://localhost:8000`. To connect to a different server, pass the | ||
/// `host` parameter to the constructor. | ||
/// | ||
/// ### Collections | ||
/// | ||
/// Chroma lets you manage collections of embeddings, using the collection | ||
/// primitive. | ||
/// | ||
/// You can configure the collection to use in the [collectionName] parameter. | ||
/// | ||
/// You can also configure the metadata to associate with the collection in the | ||
/// [collectionMetadata] parameter. | ||
/// | ||
/// ### Changing the distance function | ||
/// | ||
/// You can change the distance function of the embedding space by setting the | ||
/// value of `hnsw:space` in [collectionMetadata]. Valid options are "l2", | ||
/// "ip", or "cosine". The default is "l2". | ||
/// | ||
/// ### Filtering | ||
/// | ||
/// Chroma supports filtering queries by metadata and document contents. | ||
/// The `where` filter is used to filter by metadata, and the `whereDocument` | ||
/// filter is used to filter by document contents. | ||
/// | ||
/// For example: | ||
/// ```dart | ||
/// final vectorStore = Chroma(...); | ||
/// final res = await vectorStore.similaritySearch( | ||
/// query: 'What should I feed my cat?', | ||
/// config: ChromaSimilaritySearch( | ||
/// k: 5, | ||
/// scoreThreshold: 0.8, | ||
/// where: {'class: 'cat'}, | ||
/// ), | ||
/// ); | ||
/// ``` | ||
/// | ||
/// Chroma supports a wide range of operators for filtering. Check out the | ||
/// filtering section of the Chroma docs for more info: | ||
/// https://docs.trychroma.com/usage-guide?lang=js#using-where-filters | ||
/// {@endtemplate} | ||
class Chroma extends VectorStore { | ||
/// {@macro chroma} | ||
Chroma({ | ||
this.collectionName = 'langchain', | ||
this.collectionMetadata, | ||
required super.embeddings, | ||
final String? host, | ||
final http.Client? client, | ||
}) : _client = ChromaClient( | ||
host: host ?? 'http://localhost:8000', | ||
client: client, | ||
); | ||
|
||
/// Name of the collection to use. | ||
final String collectionName; | ||
|
||
/// Metadata to associate with the collection. | ||
final Map<String, dynamic>? collectionMetadata; | ||
|
||
/// The Chroma client. | ||
final ChromaClient _client; | ||
|
||
/// A UUID generator. | ||
final Uuid _uuid = const Uuid(); | ||
|
||
/// The collection to use. | ||
Collection? _collection; | ||
|
||
@override | ||
Future<List<String>> addVectors({ | ||
required final List<List<double>> vectors, | ||
required final List<Document> documents, | ||
}) async { | ||
assert(vectors.length == documents.length); | ||
|
||
final collection = await _getCollection(); | ||
|
||
final List<String> ids = []; | ||
final List<Map<String, dynamic>> metadatas = []; | ||
final List<String> docs = []; | ||
|
||
for (var i = 0; i < documents.length; i++) { | ||
final doc = documents[i]; | ||
final id = doc.id ?? _uuid.v4(); | ||
ids.add(id); | ||
metadatas.add(doc.metadata); | ||
docs.add(doc.pageContent); | ||
} | ||
|
||
await collection.upsert( | ||
ids: ids, | ||
embeddings: vectors, | ||
metadatas: metadatas, | ||
documents: docs, | ||
); | ||
return ids; | ||
} | ||
|
||
@override | ||
Future<void> delete({ | ||
required final List<String> ids, | ||
}) async { | ||
final collection = await _getCollection(); | ||
await collection.delete(ids: ids); | ||
} | ||
|
||
@override | ||
Future<List<(Document, double)>> similaritySearchByVectorWithScores({ | ||
required final List<double> embedding, | ||
final VectorStoreSimilaritySearch config = | ||
const VectorStoreSimilaritySearch(), | ||
}) async { | ||
final collection = await _getCollection(); | ||
final result = await collection.query( | ||
queryEmbeddings: [embedding], | ||
nResults: config.k, | ||
where: config.filter, | ||
whereDocument: | ||
config is ChromaSimilaritySearch ? config.whereDocument : null, | ||
include: const [ | ||
Include.documents, | ||
Include.metadatas, | ||
Include.distances, | ||
], | ||
); | ||
final ids = result.ids.first; | ||
final metadatas = result.metadatas?.first; | ||
final docs = result.documents?.first; | ||
final distances = result.distances?.first; | ||
|
||
final List<(Document, double)> results = []; | ||
for (var i = 0; i < ids.length; i++) { | ||
final distance = distances?[i] ?? 0.0; | ||
if (config.scoreThreshold != null && distance < config.scoreThreshold!) { | ||
continue; | ||
} | ||
|
||
final doc = Document( | ||
id: ids[i], | ||
metadata: metadatas?[i] ?? {}, | ||
pageContent: docs?[i] ?? '', | ||
); | ||
results.add((doc, distance)); | ||
} | ||
return results; | ||
} | ||
|
||
Future<Collection> _getCollection() async { | ||
if (_collection != null) { | ||
return _collection!; | ||
} | ||
|
||
final collection = await _client.getOrCreateCollection( | ||
name: collectionName, | ||
metadata: collectionMetadata, | ||
); | ||
|
||
_collection = collection; | ||
return collection; | ||
} | ||
} |
33 changes: 33 additions & 0 deletions
33
packages/langchain_chroma/lib/src/vector_stores/models/models.dart
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,33 @@ | ||
import 'package:langchain/langchain.dart'; | ||
|
||
/// {@template chroma_similarity_search} | ||
/// Chroma similarity search config. | ||
/// | ||
/// Chroma supports filtering queries by metadata and document contents. | ||
/// The [where] filter is used to filter by metadata, and the [whereDocument] | ||
/// filter is used to filter by document contents. | ||
/// | ||
/// Check out the filtering section of the Chroma docs for more info: | ||
/// https://docs.trychroma.com/usage-guide?lang=js#using-where-filters | ||
/// | ||
/// Example: | ||
/// ```dart | ||
/// ChromaSimilaritySearch( | ||
/// k: 5, | ||
/// where: {'style: 'style1'}, | ||
/// scoreThreshold: 0.8, | ||
/// ), | ||
/// ``` | ||
/// {@endtemplate} | ||
class ChromaSimilaritySearch extends VectorStoreSimilaritySearch { | ||
/// {@macro chroma_similarity_search} | ||
const ChromaSimilaritySearch({ | ||
super.k = 4, | ||
final Map<String, dynamic>? where, | ||
this.whereDocument, | ||
super.scoreThreshold, | ||
}) : super(filter: where); | ||
|
||
/// Optional query condition to filter results based on document content. | ||
final Map<String, dynamic>? whereDocument; | ||
} |
2 changes: 2 additions & 0 deletions
2
packages/langchain_chroma/lib/src/vector_stores/vector_stores.dart
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,2 @@ | ||
export 'chroma.dart'; | ||
export 'models/models.dart'; |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -1,18 +1,21 @@ | ||
name: langchain_chroma | ||
description: Chroma module for LangChain.dart. | ||
description: LangChain.dart integration module for Chroma open-source embedding database. | ||
version: 0.0.1-dev.1 | ||
repository: https://github.com/davidmigloz/langchain_dart/tree/main/packages/langchain_chroma | ||
issue_tracker: https://github.com/davidmigloz/langchain_dart/issues | ||
homepage: https://github.com/davidmigloz/langchain_dart | ||
documentation: https://langchaindart.com | ||
publish_to: none # Remove when the package is ready to be published | ||
|
||
environment: | ||
sdk: ">=3.0.0 <4.0.0" | ||
|
||
dependencies: | ||
chromadb: ^0.0.1-dev.1 | ||
http: ^1.1.0 | ||
langchain: ^0.0.9 | ||
meta: ^1.9.1 | ||
uuid: ^3.0.7 | ||
|
||
dev_dependencies: | ||
test: ^1.24.3 | ||
langchain_openai: ^0.0.9 |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -1,4 +1,8 @@ | ||
# melos_managed_dependency_overrides: langchain | ||
# melos_managed_dependency_overrides: langchain,chromadb,langchain_openai | ||
dependency_overrides: | ||
chromadb: | ||
path: ../chromadb | ||
langchain: | ||
path: ../langchain | ||
langchain_openai: | ||
path: ../langchain_openai |
Oops, something went wrong.