Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 2 additions & 2 deletions docker-compose.yml
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,7 @@ version: '3.5'
services:
standalone:
container_name: milvus-javasdk-standalone-1
image: milvusdb/milvus:v2.6.1
image: milvusdb/milvus:master-20250922-200ee4cb-amd64
command: [ "milvus", "run", "standalone" ]
environment:
- COMMON_STORAGETYPE=local
Expand All @@ -24,7 +24,7 @@ services:

standaloneslave:
container_name: milvus-javasdk-standalone-2
image: milvusdb/milvus:v2.6.1
image: milvusdb/milvus:master-20250922-200ee4cb-amd64
command: [ "milvus", "run", "standalone" ]
environment:
- COMMON_STORAGETYPE=local
Expand Down
2 changes: 1 addition & 1 deletion examples/src/main/java/io/milvus/v1/CommonUtils.java
Original file line number Diff line number Diff line change
Expand Up @@ -300,7 +300,7 @@ public static SortedMap<Long, Float> generateSparseVector() {
Random ran = new Random();
SortedMap<Long, Float> sparse = new TreeMap<>();
int dim = ran.nextInt(10) + 10;
for (int i = 0; i < dim; ++i) {
while (sparse.size() < dim) {
sparse.put((long)ran.nextInt(1000000), ran.nextFloat());
}
return sparse;
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -33,11 +33,9 @@
import io.milvus.bulkwriter.writer.ParquetFileWriter;
import io.milvus.common.utils.ExceptionUtils;
import io.milvus.common.utils.Float16Utils;
import io.milvus.grpc.FieldSchema;
import io.milvus.param.ParamUtils;
import io.milvus.v2.common.DataType;
import io.milvus.v2.service.collection.request.CreateCollectionReq;
import io.milvus.v2.utils.SchemaUtils;
import io.milvus.v2.utils.DataUtils;
import org.apache.commons.collections4.CollectionUtils;
import org.apache.commons.lang3.tuple.Pair;
import org.slf4j.Logger;
Expand Down Expand Up @@ -362,8 +360,7 @@ protected Map<String, Object> verifyRow(JsonObject row) {
}

private Pair<Object, Integer> verifyVector(JsonElement object, CreateCollectionReq.FieldSchema field) {
FieldSchema grpcField = SchemaUtils.convertToGrpcFieldSchema(field);
Object vector = ParamUtils.checkFieldValue(ParamUtils.ConvertField(grpcField), object);
Object vector = DataUtils.checkFieldValue(field, object);
io.milvus.v2.common.DataType dataType = field.getDataType();
switch (dataType) {
case FloatVector:
Expand Down Expand Up @@ -396,8 +393,7 @@ private Pair<Object, Integer> verifyVarchar(JsonElement object, CreateCollection
return Pair.of(null, 0);
}

FieldSchema grpcField = SchemaUtils.convertToGrpcFieldSchema(field);
Object varchar = ParamUtils.checkFieldValue(ParamUtils.ConvertField(grpcField), object);
Object varchar = DataUtils.checkFieldValue(field, object);
return Pair.of(varchar, String.valueOf(varchar).length());
}

Expand All @@ -411,8 +407,7 @@ private Pair<Object, Integer> verifyJSON(JsonElement object, CreateCollectionReq
}

private Pair<Object, Integer> verifyArray(JsonElement object, CreateCollectionReq.FieldSchema field) {
FieldSchema grpcField = SchemaUtils.convertToGrpcFieldSchema(field);
Object array = ParamUtils.checkFieldValue(ParamUtils.ConvertField(grpcField), object);
Object array = DataUtils.checkFieldValue(field, object);
if (array == null) {
return Pair.of(null, 0);
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -91,7 +91,7 @@ public List<ByteBuffer> generateBFloat16Vectors(int count) {
public SortedMap<Long, Float> generateSparseVector() {
SortedMap<Long, Float> sparse = new TreeMap<>();
int dim = RANDOM.nextInt(10) + 10;
for (int i = 0; i < dim; ++i) {
while (sparse.size() < dim) {
sparse.put((long) RANDOM.nextInt(1000000), RANDOM.nextFloat());
}
return sparse;
Expand Down
100 changes: 48 additions & 52 deletions sdk-core/src/main/java/io/milvus/param/ParamUtils.java

Large diffs are not rendered by default.

183 changes: 140 additions & 43 deletions sdk-core/src/main/java/io/milvus/response/FieldDataWrapper.java
Original file line number Diff line number Diff line change
Expand Up @@ -30,9 +30,7 @@

import java.nio.ByteBuffer;
import java.nio.ByteOrder;
import java.util.ArrayList;
import java.util.List;
import java.util.SortedMap;
import java.util.*;
import java.util.stream.Collectors;

import com.google.protobuf.ByteString;
Expand Down Expand Up @@ -72,7 +70,11 @@ public int getDim() throws IllegalResponseException {
if (!isVectorField()) {
throw new IllegalResponseException("Not a vector field");
}
return (int) fieldData.getVectors().getDim();
return getDimInternal(fieldData.getVectors());
}

private int getDimInternal(VectorField vector) {
return (int) vector.getDim();
}

// this method returns bytes size of each vector according to vector type
Expand Down Expand Up @@ -106,16 +108,16 @@ private int checkDim(DataType dt, ByteString data, int dim) {
return 0;
}

private ByteString getVectorBytes(FieldData fieldData, DataType dt) {
private ByteString getVectorBytes(VectorField vd, DataType dt) {
ByteString data;
if (dt == DataType.BinaryVector) {
data = fieldData.getVectors().getBinaryVector();
data = vd.getBinaryVector();
} else if (dt == DataType.Float16Vector) {
data = fieldData.getVectors().getFloat16Vector();
data = vd.getFloat16Vector();
} else if (dt == DataType.BFloat16Vector) {
data = fieldData.getVectors().getBfloat16Vector();
data = vd.getBfloat16Vector();
} else if (dt == DataType.Int8Vector) {
data = fieldData.getVectors().getInt8Vector();
data = vd.getInt8Vector();
} else {
String msg = String.format("Unsupported data type %s returned by FieldData", dt.name());
throw new IllegalResponseException(msg);
Expand Down Expand Up @@ -148,7 +150,7 @@ public long getRowCount() throws IllegalResponseException {
case BFloat16Vector:
case Int8Vector: {
int dim = getDim();
ByteString data = getVectorBytes(fieldData, dt);
ByteString data = getVectorBytes(fieldData.getVectors(), dt);
int bytePerVec = checkDim(dt, data, dim);

return data.size()/bytePerVec;
Expand Down Expand Up @@ -176,6 +178,20 @@ public long getRowCount() throws IllegalResponseException {
return fieldData.getScalars().getJsonData().getDataCount();
case Array:
return fieldData.getScalars().getArrayData().getDataCount();
case ArrayOfStruct: {
List<FieldData> structData = fieldData.getStructArrays().getFieldsList();
for (FieldData fd : structData) {
if (fd.getType() == DataType.Array) {
return fd.getScalars().getArrayData().getDataCount();
} else if (fd.getType() == DataType.ArrayOfVector) {
FieldDataWrapper tempWrapper = new FieldDataWrapper(fd);
return tempWrapper.getRowCount();
}
}
}
case ArrayOfVector: {
return fieldData.getVectors().getVectorArray().getDataCount();
}
default:
throw new IllegalResponseException("Unsupported data type returned by FieldData");
}
Expand All @@ -194,6 +210,7 @@ public long getRowCount() throws IllegalResponseException {
* Varchar field returns List of String
* Array field returns List of List
* JSON field returns List of String;
* Struct field returns List of List<Map<String, Object>>
* etc.
*
* Throws {@link IllegalResponseException} if the field type is illegal.
Expand All @@ -211,10 +228,51 @@ public List<?> getFieldData() throws IllegalResponseException {

private List<?> getFieldDataInternal() throws IllegalResponseException {
DataType dt = fieldData.getType();
switch (dt) {
case FloatVector:
case BinaryVector:
case Float16Vector:
case BFloat16Vector:
case Int8Vector:
case SparseFloatVector:
return getVectorData(dt, fieldData.getVectors());
case Array:
case Int64:
case Int32:
case Int16:
case Int8:
case Bool:
case Float:
case Double:
case VarChar:
case String:
case JSON:
return getScalarData(dt, fieldData.getScalars(), fieldData.getValidDataList());
case ArrayOfStruct:
return getStructData(fieldData.getStructArrays(), fieldData.getFieldName());
default:
throw new IllegalResponseException("Unsupported data type returned by FieldData");
}
}

private List<?> setNoneData(List<?> data, List<Boolean> validData) {
if (validData != null && validData.size() == data.size()) {
List<?> newData = new ArrayList<>(data); // copy the list since the data is come from grpc is not mutable
for (int i = 0; i < validData.size(); i++) {
if (validData.get(i) == Boolean.FALSE) {
newData.set(i, null);
}
}
return newData;
}
return data;
}

private List<?> getVectorData(DataType dt, VectorField vector) {
switch (dt) {
case FloatVector: {
int dim = getDim();
List<Float> data = fieldData.getVectors().getFloatVector().getDataList();
int dim = getDimInternal(vector);
List<Float> data = vector.getFloatVector().getDataList();
if (data.size() % dim != 0) {
String msg = String.format("Returned float vector data array size %d doesn't match dimension %d",
data.size(), dim);
Expand All @@ -232,10 +290,10 @@ private List<?> getFieldDataInternal() throws IllegalResponseException {
case Float16Vector:
case BFloat16Vector:
case Int8Vector: {
int dim = getDim();
ByteString data = getVectorBytes(fieldData, dt);
int dim = getDimInternal(vector);
ByteString data = getVectorBytes(vector, dt);
int bytePerVec = checkDim(dt, data, dim);
int count = data.size()/bytePerVec;
int count = data.size() / bytePerVec;
List<ByteBuffer> packData = new ArrayList<>();
for (int i = 0; i < count; ++i) {
ByteBuffer bf = ByteBuffer.allocate(bytePerVec);
Expand All @@ -252,7 +310,7 @@ private List<?> getFieldDataInternal() throws IllegalResponseException {
// in Java sdk, each sparse vector is pairs of long+float
// in server side, each sparse vector is stored as uint+float (8 bytes)
// don't use sparseArray.getDim() because the dim is the max index of each rows
SparseFloatArray sparseArray = fieldData.getVectors().getSparseFloatVector();
SparseFloatArray sparseArray = vector.getSparseFloatVector();
List<SortedMap<Long, Float>> packData = new ArrayList<>();
for (int i = 0; i < sparseArray.getContentsCount(); ++i) {
ByteString bs = sparseArray.getContents(i);
Expand All @@ -262,34 +320,9 @@ private List<?> getFieldDataInternal() throws IllegalResponseException {
}
return packData;
}
case Array:
case Int64:
case Int32:
case Int16:
case Int8:
case Bool:
case Float:
case Double:
case VarChar:
case String:
case JSON:
return getScalarData(dt, fieldData.getScalars(), fieldData.getValidDataList());
default:
throw new IllegalResponseException("Unsupported data type returned by FieldData");
}
}

private List<?> setNoneData(List<?> data, List<Boolean> validData) {
if (validData != null && validData.size() == data.size()) {
List<?> newData = new ArrayList<>(data); // copy the list since the data is come from grpc is not mutable
for (int i = 0; i < validData.size(); i++) {
if (validData.get(i) == Boolean.FALSE) {
newData.set(i, null);
}
}
return newData;
return new ArrayList<>();
}
return data;
}

private List<?> getScalarData(DataType dt, ScalarField scalar, List<Boolean> validData) {
Expand All @@ -315,7 +348,7 @@ private List<?> getScalarData(DataType dt, ScalarField scalar, List<Boolean> val
return dataList.stream().map(ByteString::toStringUtf8).collect(Collectors.toList());
case Array:
List<List<?>> array = new ArrayList<>();
ArrayArray arrArray = fieldData.getScalars().getArrayData();
ArrayArray arrArray = scalar.getArrayData();
boolean nullable = validData != null && validData.size() == arrArray.getDataCount();
for (int i = 0; i < arrArray.getDataCount(); i++) {
if (nullable && validData.get(i) == Boolean.FALSE) {
Expand All @@ -331,6 +364,70 @@ private List<?> getScalarData(DataType dt, ScalarField scalar, List<Boolean> val
}
}

private List<?> getStructData(StructArrayField field, String fieldName) {
List<List<Map<String, Object>>> packData = new ArrayList<>();
if (field.getFieldsCount() == 0) {
return packData;
}

// read column data from FieldData
// for a struct with two sub-fields "int" and "emb", search with nq=2, topk=3
// the column data is like this:
// {
// "int": [[x1, x2], [x1, x2, x3], [x1], [x1, x2], [x1, x2, x3], [x1]],
// "emb": [[emb1, emb2], [emb1, emb2, emb3], [emb1], [emb1m emb2], [emb1, emb2, emb3], [emb1]],
// }
Map<String, List<List<?>>> columnsData = new HashMap<>();
int rowCount = 0;
for (FieldData fd : field.getFieldsList()) {
List<List<?>> column = new ArrayList<>();
if (fd.getType() == DataType.Array) {
column = (List<List<?>>) getScalarData(fd.getType(), fd.getScalars(), fd.getValidDataList());
columnsData.put(fd.getFieldName(), column);
rowCount = column.size();
} else if (fd.getType() == DataType.ArrayOfVector) {
VectorArray vecArr = fd.getVectors().getVectorArray();
for (VectorField vf : vecArr.getDataList()) {
List<?> vector = getVectorData(vecArr.getElementType(), vf);
column.add(vector);
}
rowCount = column.size();
columnsData.put(fd.getFieldName(), column);
} else {
throw new IllegalResponseException("Unsupported data type returned by StructArrayField");
}
}

// convert column data into struct list, eventually, the packData is like this:
// [
// [{x1, emb1}, {x2, emb2}],
// [{x1, emb1}, {x2, emb2}, {x3, emb3}],
// [{x1, emb1}],
// [{x1, emb1}, {x2, emb2}],
// [{x1, emb1}, {x2, emb2}, {x3, emb3}],
// [{x1, emb1}]
// ]
for (int i = 0; i < rowCount; i++) {
int elementCount = 0;
Map<String, List<?>> rowColumn = new HashMap<>();
for (String key : columnsData.keySet()) {
List<?> val = columnsData.get(key).get(i);
rowColumn.put(key, val);
elementCount = val.size();
}

List<Map<String, Object>> structs = new ArrayList<>();
for (int k = 0; k < elementCount; k++) {
Map<String, Object> struct = new HashMap<>();
int finalK = k;
rowColumn.forEach((key, val)->struct.put(key, val.get(finalK)));
structs.add(struct);
}
packData.add(structs);
}
return packData;
}

public Integer getAsInt(int index, String paramName) throws IllegalResponseException {
if (isJsonField()) {
String result = getAsString(index, paramName);
Expand Down
4 changes: 3 additions & 1 deletion sdk-core/src/main/java/io/milvus/v2/common/DataType.java
Original file line number Diff line number Diff line change
Expand Up @@ -45,7 +45,9 @@ public enum DataType {
Float16Vector(102),
BFloat16Vector(103),
SparseFloatVector(104),
Int8Vector(105);
Int8Vector(105),

Struct(201);

private final int code;
DataType(int code) {
Expand Down
8 changes: 7 additions & 1 deletion sdk-core/src/main/java/io/milvus/v2/common/IndexParam.java
Original file line number Diff line number Diff line change
Expand Up @@ -52,6 +52,9 @@ public enum MetricType {

// Only for sparse vector with BM25
BM25,

// Only for struct vector
MAX_SIM,
;
}

Expand Down Expand Up @@ -94,7 +97,10 @@ public enum IndexType {
SPARSE_INVERTED_INDEX(300),
// From Milvus 2.5.4 onward, SPARSE_WAND is being deprecated. Instead, it is recommended to
// use "inverted_index_algo": "DAAT_WAND" for equivalency while maintaining compatibility.
SPARSE_WAND(301)
SPARSE_WAND(301),

// Only for struct vector
EMB_LIST_HNSW(401),
;

private final String name;
Expand Down
Loading
Loading