diff --git a/docs/docs/integrations/embedding-stores/pinecone.md b/docs/docs/integrations/embedding-stores/pinecone.md index 329c424c0bf..75ab0c5c537 100644 --- a/docs/docs/integrations/embedding-stores/pinecone.md +++ b/docs/docs/integrations/embedding-stores/pinecone.md @@ -17,6 +17,14 @@ https://www.pinecone.io/ ``` +## Known Issues + +- https://github.com/langchain4j/langchain4j/issues/1948 +Pinecone stores all numbers as [floating-point values](https://docs.pinecone.io/guides/data/filter-with-metadata#supported-metadata-types), +which means `Integer` and `Long` values (e.g., 1746714878034235396) stored in `Metadata` +may be corrupted and returned as incorrect numbers! +Possible workaround: convert integer/double values to `String` before storing them in `Metadata`. +Please note that in this case metadata filtering might not work properly! ## APIs diff --git a/langchain4j-chroma/src/main/java/dev/langchain4j/store/embedding/chroma/ChromaClient.java b/langchain4j-chroma/src/main/java/dev/langchain4j/store/embedding/chroma/ChromaClient.java index 346a709f7a0..d3e84db826a 100644 --- a/langchain4j-chroma/src/main/java/dev/langchain4j/store/embedding/chroma/ChromaClient.java +++ b/langchain4j-chroma/src/main/java/dev/langchain4j/store/embedding/chroma/ChromaClient.java @@ -1,6 +1,7 @@ package dev.langchain4j.store.embedding.chroma; import static com.google.gson.FieldNamingPolicy.LOWER_CASE_WITH_UNDERSCORES; +import static com.google.gson.ToNumberPolicy.LONG_OR_DOUBLE; import com.google.gson.Gson; import com.google.gson.GsonBuilder; @@ -32,7 +33,10 @@ private ChromaClient(Builder builder) { httpClientBuilder.addInterceptor(new ChromaResponseLoggingInterceptor()); } - Gson gson = new GsonBuilder().setFieldNamingPolicy(LOWER_CASE_WITH_UNDERSCORES).create(); + Gson gson = new GsonBuilder() + .setFieldNamingPolicy(LOWER_CASE_WITH_UNDERSCORES) + .setObjectToNumberStrategy(LONG_OR_DOUBLE) + .create(); Retrofit retrofit = new Retrofit.Builder() .baseUrl(Utils.ensureTrailingForwardSlash(builder.baseUrl)) diff --git a/langchain4j-core/src/test/java/dev/langchain4j/store/embedding/EmbeddingStoreIT.java b/langchain4j-core/src/test/java/dev/langchain4j/store/embedding/EmbeddingStoreIT.java index 8b3f2b65fd8..575a3a090eb 100644 --- a/langchain4j-core/src/test/java/dev/langchain4j/store/embedding/EmbeddingStoreIT.java +++ b/langchain4j-core/src/test/java/dev/langchain4j/store/embedding/EmbeddingStoreIT.java @@ -63,6 +63,9 @@ void should_add_embedding_with_segment_with_metadata() { assertThat(match.embedded().metadata().getLong("long_minus_1")).isEqualTo(-1L); assertThat(match.embedded().metadata().getLong("long_0")).isEqualTo(0L); assertThat(match.embedded().metadata().getLong("long_1")).isEqualTo(1L); + if (testLong1746714878034235396()) { + assertThat(match.embedded().metadata().getLong("long_1746714878034235396")).isEqualTo(1746714878034235396L); + } assertThat(match.embedded().metadata().getLong("long_max")).isEqualTo(Long.MAX_VALUE); assertThat(match.embedded().metadata().getFloat("float_min")).isEqualTo(-Float.MAX_VALUE); @@ -84,6 +87,10 @@ void should_add_embedding_with_segment_with_metadata() { .build()).matches()).isEqualTo(relevant); } + protected boolean testLong1746714878034235396() { + return true; + } + protected Metadata createMetadata() { Metadata metadata = new Metadata(); @@ -104,6 +111,9 @@ protected Metadata createMetadata() { metadata.put("long_minus_1", -1L); metadata.put("long_0", 0L); metadata.put("long_1", 1L); + if (testLong1746714878034235396()) { + metadata.put("long_1746714878034235396", 1746714878034235396L); + } metadata.put("long_max", Long.MAX_VALUE); metadata.put("float_min", -Float.MAX_VALUE); diff --git a/langchain4j-milvus/src/main/java/dev/langchain4j/store/embedding/milvus/Mapper.java b/langchain4j-milvus/src/main/java/dev/langchain4j/store/embedding/milvus/Mapper.java index d1215d68f6b..791608700f5 100644 --- a/langchain4j-milvus/src/main/java/dev/langchain4j/store/embedding/milvus/Mapper.java +++ b/langchain4j-milvus/src/main/java/dev/langchain4j/store/embedding/milvus/Mapper.java @@ -1,6 +1,7 @@ package dev.langchain4j.store.embedding.milvus; import com.google.gson.Gson; +import com.google.gson.GsonBuilder; import com.google.gson.JsonObject; import com.google.gson.reflect.TypeToken; import dev.langchain4j.data.document.Metadata; @@ -22,6 +23,7 @@ import java.util.List; import java.util.Map; +import static com.google.gson.ToNumberPolicy.LONG_OR_DOUBLE; import static dev.langchain4j.internal.Utils.isNullOrBlank; import static dev.langchain4j.internal.Utils.isNullOrEmpty; import static dev.langchain4j.store.embedding.milvus.CollectionOperationsExecutor.queryForVectors; @@ -31,7 +33,10 @@ class Mapper { - private static final Gson GSON = new Gson(); + private static final Gson GSON = new GsonBuilder() + .setObjectToNumberStrategy(LONG_OR_DOUBLE) + .create(); + private static final Type MAP_TYPE = new TypeToken>() { }.getType(); diff --git a/langchain4j-pgvector/src/test/java/dev/langchain4j/store/embedding/pgvector/PgVectorEmbeddingStoreWithColumnsFilteringIT.java b/langchain4j-pgvector/src/test/java/dev/langchain4j/store/embedding/pgvector/PgVectorEmbeddingStoreWithColumnsFilteringIT.java index 79cc12d6027..181d9d13a5c 100644 --- a/langchain4j-pgvector/src/test/java/dev/langchain4j/store/embedding/pgvector/PgVectorEmbeddingStoreWithColumnsFilteringIT.java +++ b/langchain4j-pgvector/src/test/java/dev/langchain4j/store/embedding/pgvector/PgVectorEmbeddingStoreWithColumnsFilteringIT.java @@ -16,7 +16,7 @@ static void beforeAll() { Arrays.asList("key text NULL", "name text NULL", "age float NULL", "city varchar null", "country varchar null", "string_empty varchar null", "string_space varchar null", "string_abc varchar null", "uuid uuid null", "integer_min int null", "integer_minus_1 int null", "integer_0 int null", "integer_1 int null", "integer_max int null", - "long_min bigint null", "long_minus_1 bigint null", "long_0 bigint null", "long_1 bigint null", "long_max bigint null", + "long_min bigint null", "long_minus_1 bigint null", "long_0 bigint null", "long_1 bigint null", "long_1746714878034235396 bigint null", "long_max bigint null", "float_min float null", "float_minus_1 float null", "float_0 float null", "float_1 float null", "float_123 float null", "float_max float null", "double_minus_1 float8 null", "double_0 float8 null", "double_1 float8 null", "double_123 float8 null" )) diff --git a/langchain4j-pinecone/src/main/java/dev/langchain4j/store/embedding/pinecone/PineconeEmbeddingStore.java b/langchain4j-pinecone/src/main/java/dev/langchain4j/store/embedding/pinecone/PineconeEmbeddingStore.java index 049079e79fd..e4d5b862e81 100644 --- a/langchain4j-pinecone/src/main/java/dev/langchain4j/store/embedding/pinecone/PineconeEmbeddingStore.java +++ b/langchain4j-pinecone/src/main/java/dev/langchain4j/store/embedding/pinecone/PineconeEmbeddingStore.java @@ -5,7 +5,12 @@ import dev.langchain4j.data.document.Metadata; import dev.langchain4j.data.embedding.Embedding; import dev.langchain4j.data.segment.TextSegment; -import dev.langchain4j.store.embedding.*; +import dev.langchain4j.store.embedding.CosineSimilarity; +import dev.langchain4j.store.embedding.EmbeddingMatch; +import dev.langchain4j.store.embedding.EmbeddingSearchRequest; +import dev.langchain4j.store.embedding.EmbeddingSearchResult; +import dev.langchain4j.store.embedding.EmbeddingStore; +import dev.langchain4j.store.embedding.RelevanceScore; import io.pinecone.clients.Index; import io.pinecone.clients.Pinecone; import io.pinecone.unsigned_indices_model.QueryResponseWithUnsignedIndices; @@ -14,7 +19,12 @@ import org.openapitools.client.model.IndexList; import org.openapitools.client.model.IndexModel; -import java.util.*; +import java.util.ArrayList; +import java.util.Collection; +import java.util.Collections; +import java.util.List; +import java.util.Map; +import java.util.Objects; import static dev.langchain4j.internal.Utils.isNullOrEmpty; import static dev.langchain4j.internal.Utils.randomUUID; @@ -22,14 +32,21 @@ import static dev.langchain4j.store.embedding.pinecone.PineconeHelper.metadataToStruct; import static dev.langchain4j.store.embedding.pinecone.PineconeHelper.structToMetadata; import static io.pinecone.commons.IndexInterface.buildUpsertVectorWithUnsignedIndices; -import static java.util.Collections.emptyList; import static java.util.Collections.singletonList; import static java.util.Comparator.comparingDouble; import static java.util.stream.Collectors.toList; /** * Represents a Pinecone index as an embedding store. - *

Current implementation assumes the index uses the cosine distance metric.

+ *

+ * Current implementation assumes the index uses the cosine distance metric. + *

+ * WARNING! There is a known bug: + * Pinecone stores all numbers as floating-point values, + * which means {@link Integer} and {@link Long} values (e.g., 1746714878034235396) stored in {@link Metadata} + * may be corrupted and returned as incorrect numbers! + * Possible workaround: convert integer/double values to {@link String} before storing them in {@link Metadata}. + * Please note that in this case metadata filtering might not work properly! */ public class PineconeEmbeddingStore implements EmbeddingStore { diff --git a/langchain4j-pinecone/src/test/java/dev/langchain4j/store/embedding/pinecone/PineconeEmbeddingStoreIT.java b/langchain4j-pinecone/src/test/java/dev/langchain4j/store/embedding/pinecone/PineconeEmbeddingStoreIT.java index e3135092d85..68485767a3a 100644 --- a/langchain4j-pinecone/src/test/java/dev/langchain4j/store/embedding/pinecone/PineconeEmbeddingStoreIT.java +++ b/langchain4j-pinecone/src/test/java/dev/langchain4j/store/embedding/pinecone/PineconeEmbeddingStoreIT.java @@ -80,4 +80,9 @@ protected static Stream should_filter_by_metadata() { } ); } + + @Override + protected boolean testLong1746714878034235396() { + return false; // TODO remove after https://github.com/langchain4j/langchain4j/issues/1948 is fixed + } } \ No newline at end of file diff --git a/langchain4j-weaviate/src/test/java/dev/langchain4j/store/embedding/weaviate/LocalWeaviateEmbeddingStoreIT.java b/langchain4j-weaviate/src/test/java/dev/langchain4j/store/embedding/weaviate/LocalWeaviateEmbeddingStoreIT.java index 6e7b47db0b3..4518e34eea8 100644 --- a/langchain4j-weaviate/src/test/java/dev/langchain4j/store/embedding/weaviate/LocalWeaviateEmbeddingStoreIT.java +++ b/langchain4j-weaviate/src/test/java/dev/langchain4j/store/embedding/weaviate/LocalWeaviateEmbeddingStoreIT.java @@ -37,6 +37,7 @@ class LocalWeaviateEmbeddingStoreIT extends EmbeddingStoreIT { "long_minus_1", "long_0", "long_1", + "long_1746714878034235396", "long_max", "float_min", "float_minus_1",