Skip to content

Commit

Permalink
Integration with vearch (langchain4j#525)
Browse files Browse the repository at this point in the history
support langchain4j#425 . Due to my local environment problem (`vearch` docker
container start failed in Apple M1), I do the integration test in remote
`vearch` (I start up `vearch` container in remote host using docker),
and it works fine. (But I don't check using `Testcontainers` to start
up)

Two more things need discussion and your opinion:

1. There is a translation between `RelevantScore` and `CosineSimilarity`
in `findRelevant` method, I don't know if that's correct, because
`vearch` do not support cosine similarity, so I use inner product
instead (same as cosine similarity if vector is normalized). Should we
normalize vector before adding it to the embedding store?
2. There are many contraints in creating `vearch` space (retrieval types
have different parameters). Should we check it or just let users to
check themselves? (see [Create
Space](https://vearch.readthedocs.io/en/latest/use_op/op_space.html#create-space)).
Currently I implement it by using many inner static class (see
`RetrievalParam` and `RetrievalType`, in `SpaceEngine` it will do some
constraint check.)
  • Loading branch information
Martin7-1 authored Jan 29, 2024
1 parent 53c7140 commit babd64a
Show file tree
Hide file tree
Showing 32 changed files with 1,643 additions and 0 deletions.
18 changes: 18 additions & 0 deletions langchain4j-bom/pom.xml
Original file line number Diff line number Diff line change
Expand Up @@ -185,6 +185,12 @@
<version>${project.version}</version>
</dependency>

<dependency>
<groupId>dev.langchain4j</groupId>
<artifactId>langchain4j-vearch</artifactId>
<version>${project.version}</version>
</dependency>

<!-- code execution engines -->

<dependency>
Expand All @@ -207,6 +213,18 @@
<version>${project.version}</version>
</dependency>

<dependency>
<groupId>dev.langchain4j</groupId>
<artifactId>langchain4j-document-loader-github</artifactId>
<version>${project.version}</version>
</dependency>

<dependency>
<groupId>dev.langchain4j</groupId>
<artifactId>langchain4j-document-loader-azure-storage-blob</artifactId>
<version>${project.version}</version>
</dependency>

<!-- document parsers -->

<dependency>
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -45,6 +45,21 @@ public List<Float> vectorAsList() {
return list;
}

/**
* Normalize vector
*/
public void normalize() {
double norm = 0.0;
for (float f : vector) {
norm += f * f;
}
norm = Math.sqrt(norm);

for (int i = 0; i < vector.length; i++) {
vector[i] /= norm;
}
}

/**
* Returns the dimension of the vector.
* @return the dimension of the vector.
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -48,4 +48,13 @@ public void test_from() {
.isEqualTo(new Embedding(new float[]{1.0f, 2.0f, 3.0f}));
}

@Test
void test_normalize() {
Embedding embedding = new Embedding(new float[]{6f, 8f});
embedding.normalize();

Embedding expect = new Embedding(new float[]{0.6f, 0.8f});
assertThat(embedding).isEqualTo(expect);
}

}
101 changes: 101 additions & 0 deletions langchain4j-vearch/pom.xml
Original file line number Diff line number Diff line change
@@ -0,0 +1,101 @@
<?xml version="1.0" encoding="UTF-8"?>
<project xmlns="http://maven.apache.org/POM/4.0.0"
xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/xsd/maven-4.0.0.xsd">
<modelVersion>4.0.0</modelVersion>
<parent>
<groupId>dev.langchain4j</groupId>
<artifactId>langchain4j-parent</artifactId>
<version>0.26.0-SNAPSHOT</version>
<relativePath>../langchain4j-parent/pom.xml</relativePath>
</parent>

<artifactId>langchain4j-vearch</artifactId>
<packaging>jar</packaging>

<name>LangChain4j integration with Vearch</name>

<dependencies>
<dependency>
<groupId>dev.langchain4j</groupId>
<artifactId>langchain4j-core</artifactId>
</dependency>

<dependency>
<groupId>com.squareup.retrofit2</groupId>
<artifactId>retrofit</artifactId>
</dependency>

<dependency>
<groupId>com.squareup.retrofit2</groupId>
<artifactId>converter-gson</artifactId>
</dependency>

<dependency>
<groupId>com.squareup.okhttp3</groupId>
<artifactId>okhttp</artifactId>
</dependency>

<dependency>
<groupId>org.projectlombok</groupId>
<artifactId>lombok</artifactId>
<scope>provided</scope>
</dependency>

<dependency>
<groupId>org.slf4j</groupId>
<artifactId>slf4j-api</artifactId>
</dependency>

<dependency>
<groupId>org.junit.jupiter</groupId>
<artifactId>junit-jupiter-engine</artifactId>
<scope>test</scope>
</dependency>

<dependency>
<groupId>org.assertj</groupId>
<artifactId>assertj-core</artifactId>
<scope>test</scope>
</dependency>

<dependency>
<groupId>dev.langchain4j</groupId>
<artifactId>langchain4j-embeddings-all-minilm-l6-v2-q</artifactId>
<scope>test</scope>
</dependency>

<dependency>
<groupId>dev.langchain4j</groupId>
<artifactId>langchain4j-core</artifactId>
<classifier>tests</classifier>
<type>test-jar</type>
<scope>test</scope>
</dependency>

<dependency>
<groupId>org.testcontainers</groupId>
<artifactId>testcontainers</artifactId>
<scope>test</scope>
</dependency>

<dependency>
<groupId>org.testcontainers</groupId>
<artifactId>junit-jupiter</artifactId>
<scope>test</scope>
</dependency>

<dependency>
<groupId>org.tinylog</groupId>
<artifactId>tinylog-impl</artifactId>
<scope>test</scope>
</dependency>

<dependency>
<groupId>org.tinylog</groupId>
<artifactId>slf4j-tinylog</artifactId>
<scope>test</scope>
</dependency>
</dependencies>

</project>
Original file line number Diff line number Diff line change
@@ -0,0 +1,16 @@
package dev.langchain4j.store.embedding.vearch;

import lombok.Builder;
import lombok.Getter;
import lombok.Setter;

import java.util.List;
import java.util.Map;

@Getter
@Setter
@Builder
class BulkRequest {

private List<Map<String, Object>> documents;
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,17 @@
package dev.langchain4j.store.embedding.vearch;

import com.google.gson.annotations.SerializedName;
import lombok.Builder;
import lombok.Getter;
import lombok.Setter;

@Getter
@Setter
@Builder
class BulkResponse {

private Integer status;
private String error;
@SerializedName("_id")
private String id;
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,13 @@
package dev.langchain4j.store.embedding.vearch;

import lombok.Builder;
import lombok.Getter;
import lombok.Setter;

@Getter
@Setter
@Builder
class CreateDatabaseRequest {

private String name;
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,14 @@
package dev.langchain4j.store.embedding.vearch;

import lombok.Builder;
import lombok.Getter;
import lombok.Setter;

@Getter
@Setter
@Builder
class CreateDatabaseResponse {

private Long id;
private String name;
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,24 @@
package dev.langchain4j.store.embedding.vearch;

import dev.langchain4j.store.embedding.vearch.ModelParam;
import dev.langchain4j.store.embedding.vearch.SpaceEngine;
import dev.langchain4j.store.embedding.vearch.SpacePropertyParam;
import lombok.Builder;
import lombok.Getter;
import lombok.Setter;

import java.util.List;
import java.util.Map;

@Getter
@Setter
@Builder
class CreateSpaceRequest {

private String name;
private Integer partitionNum;
private Integer replicaNum;
private SpaceEngine engine;
private Map<String, SpacePropertyParam> properties;
private List<ModelParam> models;
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,14 @@
package dev.langchain4j.store.embedding.vearch;

import lombok.Builder;
import lombok.Getter;
import lombok.Setter;

@Getter
@Setter
@Builder
class CreateSpaceResponse {

private Integer id;
private String name;
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,14 @@
package dev.langchain4j.store.embedding.vearch;

import lombok.Builder;
import lombok.Getter;
import lombok.Setter;

@Getter
@Setter
@Builder
class ListDatabaseResponse {

private Integer id;
private String name;
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,14 @@
package dev.langchain4j.store.embedding.vearch;

import lombok.Builder;
import lombok.Getter;
import lombok.Setter;

@Getter
@Setter
@Builder
public class ListSpaceResponse {

private Integer id;
private String name;
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,17 @@
package dev.langchain4j.store.embedding.vearch;

import com.google.gson.annotations.SerializedName;

/**
* if metric type is not set when searching, it will use the parameter specified when building the space
*
* <p>LangChain4j currently only support {@link MetricType#INNER_PRODUCT}</p>
*/
public enum MetricType {

/**
* Inner Product
*/
@SerializedName("InnerProduct")
INNER_PRODUCT
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,17 @@
package dev.langchain4j.store.embedding.vearch;

import lombok.Builder;
import lombok.Getter;
import lombok.Setter;

import java.util.List;

@Getter
@Setter
@Builder
public class ModelParam {

private String modelId;
private List<String> fields;
private String out;
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,15 @@
package dev.langchain4j.store.embedding.vearch;

import lombok.Builder;
import lombok.Getter;
import lombok.Setter;

@Getter
@Setter
@Builder
class ResponseWrapper<T> {

private Integer code;
private String msg;
private T data;
}
Loading

0 comments on commit babd64a

Please sign in to comment.