forked from langchain4j/langchain4j
-
Notifications
You must be signed in to change notification settings - Fork 0
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
Integration with MongoDB (langchain4j#535)
see original PR langchain4j#254. There are four mainly differences: 1. Using `Testcontainer` and MongoDB Atlas Local Deployment to test 2. Create collection and index when the `MongoDBEmbeddingStore` initialize, rather than create when adding new embedding at the first time. 3. Optimize `BsonUtils`, which is replaced by `org.bson.Document` to create index mapping. 4. Rename `langchain4j-mongodb` to `langchain4j-mongodb-atlas` Local deployment tests are all passed, but cloud tests are not tested yet because I encounter some network problem when communicating with MongoDB Atlas. (But I think it doesn't matter, because local deployment is the same as cloud, the purpose of local deployment is to development and test)
- Loading branch information
Showing
12 changed files
with
879 additions
and
0 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,92 @@ | ||
<?xml version="1.0" encoding="UTF-8"?> | ||
<project xmlns="http://maven.apache.org/POM/4.0.0" | ||
xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" | ||
xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/xsd/maven-4.0.0.xsd"> | ||
<modelVersion>4.0.0</modelVersion> | ||
<parent> | ||
<groupId>dev.langchain4j</groupId> | ||
<artifactId>langchain4j-parent</artifactId> | ||
<version>0.27.0-SNAPSHOT</version> | ||
<relativePath>../langchain4j-parent/pom.xml</relativePath> | ||
</parent> | ||
|
||
<artifactId>langchain4j-mongodb-atlas</artifactId> | ||
<packaging>jar</packaging> | ||
|
||
<name>LangChain4j :: Integration :: MongoDB Atlas</name> | ||
|
||
<dependencies> | ||
<dependency> | ||
<groupId>dev.langchain4j</groupId> | ||
<artifactId>langchain4j-core</artifactId> | ||
</dependency> | ||
|
||
<dependency> | ||
<groupId>org.mongodb</groupId> | ||
<artifactId>mongodb-driver-sync</artifactId> | ||
<version>4.11.1</version> | ||
</dependency> | ||
|
||
<dependency> | ||
<groupId>org.projectlombok</groupId> | ||
<artifactId>lombok</artifactId> | ||
<scope>provided</scope> | ||
</dependency> | ||
|
||
<dependency> | ||
<groupId>org.slf4j</groupId> | ||
<artifactId>slf4j-api</artifactId> | ||
</dependency> | ||
|
||
<dependency> | ||
<groupId>dev.langchain4j</groupId> | ||
<artifactId>langchain4j-core</artifactId> | ||
<classifier>tests</classifier> | ||
<type>test-jar</type> | ||
<scope>test</scope> | ||
</dependency> | ||
|
||
<dependency> | ||
<groupId>org.junit.jupiter</groupId> | ||
<artifactId>junit-jupiter-engine</artifactId> | ||
<scope>test</scope> | ||
</dependency> | ||
|
||
<dependency> | ||
<groupId>org.assertj</groupId> | ||
<artifactId>assertj-core</artifactId> | ||
<scope>test</scope> | ||
</dependency> | ||
|
||
<dependency> | ||
<groupId>dev.langchain4j</groupId> | ||
<artifactId>langchain4j-embeddings-all-minilm-l6-v2-q</artifactId> | ||
<scope>test</scope> | ||
</dependency> | ||
|
||
<dependency> | ||
<groupId>org.testcontainers</groupId> | ||
<artifactId>junit-jupiter</artifactId> | ||
<scope>test</scope> | ||
</dependency> | ||
|
||
<dependency> | ||
<groupId>org.testcontainers</groupId> | ||
<artifactId>mongodb</artifactId> | ||
<scope>test</scope> | ||
</dependency> | ||
|
||
<dependency> | ||
<groupId>org.tinylog</groupId> | ||
<artifactId>tinylog-impl</artifactId> | ||
<scope>test</scope> | ||
</dependency> | ||
|
||
<dependency> | ||
<groupId>org.tinylog</groupId> | ||
<artifactId>slf4j-tinylog</artifactId> | ||
<scope>test</scope> | ||
</dependency> | ||
</dependencies> | ||
|
||
</project> |
26 changes: 26 additions & 0 deletions
26
...n4j-mongodb-atlas/src/main/java/dev/langchain4j/store/embedding/mongodb/IndexMapping.java
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,26 @@ | ||
package dev.langchain4j.store.embedding.mongodb; | ||
|
||
import lombok.AllArgsConstructor; | ||
import lombok.Builder; | ||
import lombok.Data; | ||
import lombok.NoArgsConstructor; | ||
|
||
import java.util.HashSet; | ||
import java.util.Set; | ||
|
||
@Data | ||
@NoArgsConstructor | ||
@AllArgsConstructor | ||
@Builder | ||
public class IndexMapping { | ||
|
||
private int dimension; | ||
private Set<String> metadataFieldNames; | ||
|
||
public static IndexMapping defaultIndexMapping() { | ||
return IndexMapping.builder() | ||
.dimension(1536) | ||
.metadataFieldNames(new HashSet<>()) | ||
.build(); | ||
} | ||
} |
77 changes: 77 additions & 0 deletions
77
...n4j-mongodb-atlas/src/main/java/dev/langchain4j/store/embedding/mongodb/MappingUtils.java
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,77 @@ | ||
package dev.langchain4j.store.embedding.mongodb; | ||
|
||
import dev.langchain4j.data.document.Metadata; | ||
import dev.langchain4j.data.embedding.Embedding; | ||
import dev.langchain4j.data.segment.TextSegment; | ||
import dev.langchain4j.store.embedding.EmbeddingMatch; | ||
import org.bson.Document; | ||
|
||
import java.util.Set; | ||
|
||
class MappingUtils { | ||
|
||
private MappingUtils() throws InstantiationException { | ||
throw new InstantiationException("can't instantiate this class"); | ||
} | ||
|
||
static MongoDbDocument toMongoDbDocument(String id, Embedding embedding, TextSegment textSegment) { | ||
if (textSegment == null) { | ||
return new MongoDbDocument(id, embedding.vectorAsList(), null, null); | ||
} | ||
return new MongoDbDocument(id, embedding.vectorAsList(), textSegment.text(), textSegment.metadata().asMap()); | ||
} | ||
|
||
static EmbeddingMatch<TextSegment> toEmbeddingMatch(MongoDbMatchedDocument matchedDocument) { | ||
TextSegment textSegment = null; | ||
if (matchedDocument.getText() != null) { | ||
textSegment = matchedDocument.getMetadata() == null ? TextSegment.from(matchedDocument.getText()) : | ||
TextSegment.from(matchedDocument.getText(), Metadata.from(matchedDocument.getMetadata())); | ||
} | ||
return new EmbeddingMatch<>(matchedDocument.getScore(), matchedDocument.getId(), Embedding.from(matchedDocument.getEmbedding()), textSegment); | ||
} | ||
|
||
static Document fromIndexMapping(IndexMapping indexMapping) { | ||
Document mapping = new Document(); | ||
mapping.append("dynamic", false); | ||
|
||
Document fields = new Document(); | ||
writeEmbedding(indexMapping.getDimension(), fields); | ||
|
||
Set<String> metadataFields = indexMapping.getMetadataFieldNames(); | ||
if (metadataFields != null && !metadataFields.isEmpty()) { | ||
writeMetadata(metadataFields, fields); | ||
} | ||
|
||
mapping.append("fields", fields); | ||
|
||
return new Document("mappings", mapping); | ||
} | ||
|
||
private static void writeMetadata(Set<String> metadataFields, Document fields) { | ||
Document metadata = new Document(); | ||
metadata.append("dynamic", false); | ||
metadata.append("type", "document"); | ||
|
||
Document metadataFieldDoc = new Document(); | ||
metadataFields.forEach(field -> writeMetadataField(metadataFieldDoc, field)); | ||
|
||
metadata.append("fields", metadataFieldDoc); | ||
|
||
fields.append("metadata", metadata); | ||
} | ||
|
||
private static void writeMetadataField(Document metadataFieldDoc, String fieldName) { | ||
Document field = new Document(); | ||
field.append("type", "token"); | ||
metadataFieldDoc.append(fieldName, field); | ||
} | ||
|
||
private static void writeEmbedding(int dimensions, Document fields) { | ||
Document embedding = new Document(); | ||
embedding.append("dimensions", dimensions); | ||
embedding.append("similarity", "cosine"); | ||
embedding.append("type", "knnVector"); | ||
|
||
fields.append("embedding", embedding); | ||
} | ||
} |
23 changes: 23 additions & 0 deletions
23
...-mongodb-atlas/src/main/java/dev/langchain4j/store/embedding/mongodb/MongoDbDocument.java
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,23 @@ | ||
package dev.langchain4j.store.embedding.mongodb; | ||
|
||
import lombok.AllArgsConstructor; | ||
import lombok.Builder; | ||
import lombok.Data; | ||
import lombok.NoArgsConstructor; | ||
import org.bson.codecs.pojo.annotations.BsonId; | ||
|
||
import java.util.List; | ||
import java.util.Map; | ||
|
||
@Data | ||
@NoArgsConstructor | ||
@AllArgsConstructor | ||
@Builder | ||
public class MongoDbDocument { | ||
|
||
@BsonId | ||
private String id; | ||
private List<Float> embedding; | ||
private String text; | ||
private Map<String, String> metadata; | ||
} |
Oops, something went wrong.