Skip to content

Commit

Permalink
Integration with MongoDB (langchain4j#535)
Browse files Browse the repository at this point in the history
see original PR langchain4j#254. There are four mainly differences:

1. Using `Testcontainer` and MongoDB Atlas Local Deployment to test
2. Create collection and index when the `MongoDBEmbeddingStore`
initialize, rather than create when adding new embedding at the first
time.
3. Optimize `BsonUtils`, which is replaced by `org.bson.Document` to
create index mapping.
4. Rename `langchain4j-mongodb` to `langchain4j-mongodb-atlas`

Local deployment tests are all passed, but cloud tests are not tested
yet because I encounter some network problem when communicating with
MongoDB Atlas. (But I think it doesn't matter, because local deployment
is the same as cloud, the purpose of local deployment is to development
and test)
  • Loading branch information
Martin7-1 authored Feb 8, 2024
1 parent dc4028b commit c694755
Show file tree
Hide file tree
Showing 12 changed files with 879 additions and 0 deletions.
6 changes: 6 additions & 0 deletions langchain4j-bom/pom.xml
Original file line number Diff line number Diff line change
Expand Up @@ -197,6 +197,12 @@
<version>${project.version}</version>
</dependency>

<dependency>
<groupId>dev.langchain4j</groupId>
<artifactId>langchain4j-mongodb-atlas</artifactId>
<version>${project.version}</version>
</dependency>

<!-- code execution engines -->

<dependency>
Expand Down
92 changes: 92 additions & 0 deletions langchain4j-mongodb-atlas/pom.xml
Original file line number Diff line number Diff line change
@@ -0,0 +1,92 @@
<?xml version="1.0" encoding="UTF-8"?>
<project xmlns="http://maven.apache.org/POM/4.0.0"
xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/xsd/maven-4.0.0.xsd">
<modelVersion>4.0.0</modelVersion>
<parent>
<groupId>dev.langchain4j</groupId>
<artifactId>langchain4j-parent</artifactId>
<version>0.27.0-SNAPSHOT</version>
<relativePath>../langchain4j-parent/pom.xml</relativePath>
</parent>

<artifactId>langchain4j-mongodb-atlas</artifactId>
<packaging>jar</packaging>

<name>LangChain4j :: Integration :: MongoDB Atlas</name>

<dependencies>
<dependency>
<groupId>dev.langchain4j</groupId>
<artifactId>langchain4j-core</artifactId>
</dependency>

<dependency>
<groupId>org.mongodb</groupId>
<artifactId>mongodb-driver-sync</artifactId>
<version>4.11.1</version>
</dependency>

<dependency>
<groupId>org.projectlombok</groupId>
<artifactId>lombok</artifactId>
<scope>provided</scope>
</dependency>

<dependency>
<groupId>org.slf4j</groupId>
<artifactId>slf4j-api</artifactId>
</dependency>

<dependency>
<groupId>dev.langchain4j</groupId>
<artifactId>langchain4j-core</artifactId>
<classifier>tests</classifier>
<type>test-jar</type>
<scope>test</scope>
</dependency>

<dependency>
<groupId>org.junit.jupiter</groupId>
<artifactId>junit-jupiter-engine</artifactId>
<scope>test</scope>
</dependency>

<dependency>
<groupId>org.assertj</groupId>
<artifactId>assertj-core</artifactId>
<scope>test</scope>
</dependency>

<dependency>
<groupId>dev.langchain4j</groupId>
<artifactId>langchain4j-embeddings-all-minilm-l6-v2-q</artifactId>
<scope>test</scope>
</dependency>

<dependency>
<groupId>org.testcontainers</groupId>
<artifactId>junit-jupiter</artifactId>
<scope>test</scope>
</dependency>

<dependency>
<groupId>org.testcontainers</groupId>
<artifactId>mongodb</artifactId>
<scope>test</scope>
</dependency>

<dependency>
<groupId>org.tinylog</groupId>
<artifactId>tinylog-impl</artifactId>
<scope>test</scope>
</dependency>

<dependency>
<groupId>org.tinylog</groupId>
<artifactId>slf4j-tinylog</artifactId>
<scope>test</scope>
</dependency>
</dependencies>

</project>
Original file line number Diff line number Diff line change
@@ -0,0 +1,26 @@
package dev.langchain4j.store.embedding.mongodb;

import lombok.AllArgsConstructor;
import lombok.Builder;
import lombok.Data;
import lombok.NoArgsConstructor;

import java.util.HashSet;
import java.util.Set;

@Data
@NoArgsConstructor
@AllArgsConstructor
@Builder
public class IndexMapping {

private int dimension;
private Set<String> metadataFieldNames;

public static IndexMapping defaultIndexMapping() {
return IndexMapping.builder()
.dimension(1536)
.metadataFieldNames(new HashSet<>())
.build();
}
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,77 @@
package dev.langchain4j.store.embedding.mongodb;

import dev.langchain4j.data.document.Metadata;
import dev.langchain4j.data.embedding.Embedding;
import dev.langchain4j.data.segment.TextSegment;
import dev.langchain4j.store.embedding.EmbeddingMatch;
import org.bson.Document;

import java.util.Set;

class MappingUtils {

private MappingUtils() throws InstantiationException {
throw new InstantiationException("can't instantiate this class");
}

static MongoDbDocument toMongoDbDocument(String id, Embedding embedding, TextSegment textSegment) {
if (textSegment == null) {
return new MongoDbDocument(id, embedding.vectorAsList(), null, null);
}
return new MongoDbDocument(id, embedding.vectorAsList(), textSegment.text(), textSegment.metadata().asMap());
}

static EmbeddingMatch<TextSegment> toEmbeddingMatch(MongoDbMatchedDocument matchedDocument) {
TextSegment textSegment = null;
if (matchedDocument.getText() != null) {
textSegment = matchedDocument.getMetadata() == null ? TextSegment.from(matchedDocument.getText()) :
TextSegment.from(matchedDocument.getText(), Metadata.from(matchedDocument.getMetadata()));
}
return new EmbeddingMatch<>(matchedDocument.getScore(), matchedDocument.getId(), Embedding.from(matchedDocument.getEmbedding()), textSegment);
}

static Document fromIndexMapping(IndexMapping indexMapping) {
Document mapping = new Document();
mapping.append("dynamic", false);

Document fields = new Document();
writeEmbedding(indexMapping.getDimension(), fields);

Set<String> metadataFields = indexMapping.getMetadataFieldNames();
if (metadataFields != null && !metadataFields.isEmpty()) {
writeMetadata(metadataFields, fields);
}

mapping.append("fields", fields);

return new Document("mappings", mapping);
}

private static void writeMetadata(Set<String> metadataFields, Document fields) {
Document metadata = new Document();
metadata.append("dynamic", false);
metadata.append("type", "document");

Document metadataFieldDoc = new Document();
metadataFields.forEach(field -> writeMetadataField(metadataFieldDoc, field));

metadata.append("fields", metadataFieldDoc);

fields.append("metadata", metadata);
}

private static void writeMetadataField(Document metadataFieldDoc, String fieldName) {
Document field = new Document();
field.append("type", "token");
metadataFieldDoc.append(fieldName, field);
}

private static void writeEmbedding(int dimensions, Document fields) {
Document embedding = new Document();
embedding.append("dimensions", dimensions);
embedding.append("similarity", "cosine");
embedding.append("type", "knnVector");

fields.append("embedding", embedding);
}
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,23 @@
package dev.langchain4j.store.embedding.mongodb;

import lombok.AllArgsConstructor;
import lombok.Builder;
import lombok.Data;
import lombok.NoArgsConstructor;
import org.bson.codecs.pojo.annotations.BsonId;

import java.util.List;
import java.util.Map;

@Data
@NoArgsConstructor
@AllArgsConstructor
@Builder
public class MongoDbDocument {

@BsonId
private String id;
private List<Float> embedding;
private String text;
private Map<String, String> metadata;
}
Loading

0 comments on commit c694755

Please sign in to comment.