Skip to content

Commit 70e14f4

Browse files
authored
Fix a critical bug that partial upsert override field value to null (#1637)
Signed-off-by: yhmo <yihua.mo@zilliz.com>
1 parent 07143ec commit 70e14f4

File tree

3 files changed

+153
-66
lines changed

3 files changed

+153
-66
lines changed

examples/src/main/java/io/milvus/v2/UpsertExample.java

Lines changed: 78 additions & 26 deletions
Original file line numberDiff line numberDiff line change
@@ -51,7 +51,8 @@ public class UpsertExample {
5151
private static final String ID_FIELD = "pk";
5252
private static final String VECTOR_FIELD = "vector";
5353
private static final String TEXT_FIELD = "text";
54-
private static final Integer VECTOR_DIM = 128;
54+
private static final String NULLABLE_FIELD = "nullable";
55+
private static final Integer VECTOR_DIM = 4;
5556

5657
private static List<Object> createCollection(boolean autoID) {
5758
// Drop collection if exists
@@ -78,6 +79,11 @@ private static List<Object> createCollection(boolean autoID) {
7879
.dataType(DataType.VarChar)
7980
.maxLength(100)
8081
.build());
82+
collectionSchema.addField(AddFieldReq.builder()
83+
.fieldName(NULLABLE_FIELD)
84+
.dataType(DataType.Int32)
85+
.isNullable(true)
86+
.build());
8187

8288
List<IndexParam> indexes = new ArrayList<>();
8389
indexes.add(IndexParam.builder()
@@ -106,6 +112,7 @@ private static List<Object> createCollection(boolean autoID) {
106112
List<Float> vector = CommonUtils.generateFloatVector(VECTOR_DIM);
107113
row.add(VECTOR_FIELD, gson.toJsonTree(vector));
108114
row.addProperty(TEXT_FIELD, String.format("text_%d", i));
115+
row.addProperty(NULLABLE_FIELD, i);
109116
rows.add(row);
110117
}
111118
InsertResp resp = client.insert(InsertReq.builder()
@@ -119,7 +126,7 @@ private static void queryWithExpr(String expr) {
119126
QueryResp queryRet = client.query(QueryReq.builder()
120127
.collectionName(COLLECTION_NAME)
121128
.filter(expr)
122-
.outputFields(Arrays.asList(ID_FIELD, TEXT_FIELD))
129+
.outputFields(Arrays.asList(ID_FIELD, VECTOR_FIELD, TEXT_FIELD, NULLABLE_FIELD))
123130
.consistencyLevel(ConsistencyLevel.STRONG)
124131
.build());
125132
System.out.println("\nQuery with expression: " + expr);
@@ -130,34 +137,79 @@ private static void queryWithExpr(String expr) {
130137
}
131138

132139
private static void doUpsert(boolean autoID) {
133-
// if autoID is true, the collection primary key is auto-generated by server
140+
System.out.printf("\n============================= autoID = %s =============================", autoID ? "true" : "false");
141+
// If autoID is true, the collection primary key is auto-generated by server
134142
List<Object> ids = createCollection(autoID);
135143

136-
// query before upsert
137-
Long testID = (Long)ids.get(1);
138-
String filter = String.format("%s == %d", ID_FIELD, testID);
139-
queryWithExpr(filter);
140-
141-
// upsert
142-
// the server will return a new primary key, the old entity is deleted,
143-
// and a new entity is created with the new primary key
144144
Gson gson = new Gson();
145-
JsonObject row = new JsonObject();
146-
row.addProperty(ID_FIELD, testID);
147-
List<Float> vector = CommonUtils.generateFloatVector(VECTOR_DIM);
148-
row.add(VECTOR_FIELD, gson.toJsonTree(vector));
149-
row.addProperty(TEXT_FIELD, "this field has been updated");
150-
UpsertResp upsertResp = client.upsert(UpsertReq.builder()
151-
.collectionName(COLLECTION_NAME)
152-
.data(Collections.singletonList(row))
153-
.build());
154-
List<Object> newIds = upsertResp.getPrimaryKeys();
155-
Long newID = (Long)newIds.get(0);
156-
System.out.println("\nUpsert done");
145+
{
146+
// Query before upsert, get the No.2 primary key
147+
Long oldID = (Long) ids.get(1);
148+
String filter = String.format("%s == %d", ID_FIELD, oldID);
149+
queryWithExpr(filter);
150+
151+
// Upsert, update all fields value
152+
// If autoID is true, the server will return a new primary key for the updated entity
153+
JsonObject row = new JsonObject();
154+
row.addProperty(ID_FIELD, oldID);
155+
List<Float> vector = Arrays.asList(1.0f, 1.0f, 1.0f, 1.0f);
156+
row.add(VECTOR_FIELD, gson.toJsonTree(vector));
157+
row.addProperty(TEXT_FIELD, "this field has been updated");
158+
row.add(NULLABLE_FIELD, null); // update nullable field to null
159+
UpsertResp upsertResp = client.upsert(UpsertReq.builder()
160+
.collectionName(COLLECTION_NAME)
161+
.data(Collections.singletonList(row))
162+
.build());
163+
List<Object> newIds = upsertResp.getPrimaryKeys();
164+
Long newID = (Long) newIds.get(0);
165+
System.out.printf("\nUpsert done, primary key %d has been updated to %d%n", oldID, newID);
166+
167+
// Query after upsert, you will see the vector field is [1.0f, 1.0f, 1.0f, 1.0f],
168+
// text field is "this field has been updated", nullable field is null
169+
filter = String.format("%s == %d", ID_FIELD, newID);
170+
queryWithExpr(filter);
171+
}
157172

158-
// query after upsert
159-
filter = String.format("%s == %d", ID_FIELD, newID);
160-
queryWithExpr(filter);
173+
{
174+
// Query before upsert, get the No.5 and No.6 primary key
175+
Long oldID1 = (Long)ids.get(4);
176+
Long oldID2 = (Long)ids.get(5);
177+
String filter = String.format("%s in [%d, %d]", ID_FIELD, oldID1, oldID2);
178+
queryWithExpr(filter);
179+
180+
// Partial upsert, only update the specified field, other fields will keep old values
181+
// If autoID is true, the server will return a new primary key for the updated entity
182+
// Note: for the case to do partial upsert for multi entities, it only allows updating
183+
// the same fields for all rows.
184+
// For example, assume a collection has 2 fields: A and B
185+
// it is legal to update the same fields as: client.upsert(data = [ {"A": 1}, {"A": 3}])
186+
// it is illegal to update different fields as: client.upsert(data = [ {"A": 1}, {"B": 3}])
187+
// Read the doc for more info: https://milvus.io/docs/upsert-entities.md
188+
// Here we update the same field "text" for the two rows.
189+
JsonObject row1 = new JsonObject();
190+
row1.addProperty(ID_FIELD, oldID1);
191+
row1.addProperty(TEXT_FIELD, "this row has been partially updated");
192+
193+
JsonObject row2 = new JsonObject();
194+
row2.addProperty(ID_FIELD, oldID2);
195+
row2.addProperty(TEXT_FIELD, "this row has been partially updated");
196+
197+
UpsertResp upsertResp = client.upsert(UpsertReq.builder()
198+
.collectionName(COLLECTION_NAME)
199+
.data(Arrays.asList(row1, row2))
200+
.partialUpdate(true)
201+
.build());
202+
List<Object> newIds = upsertResp.getPrimaryKeys();
203+
Long newID1 = (Long) newIds.get(0);
204+
Long newID2 = (Long) newIds.get(1);
205+
System.out.printf("\nPartial upsert done, primary key %d has been updated to %d, %d has been updated to %d%n",
206+
oldID1, newID1, oldID2, newID2);
207+
208+
// query after upsert, you will see the text field is "this row has been partially updated"
209+
// the other fields keep old values
210+
filter = String.format("%s in [%d, %d]", ID_FIELD, newID1, newID2);
211+
queryWithExpr(filter);
212+
}
161213
}
162214

163215
public static void main(String[] args) {

sdk-core/src/main/java/io/milvus/v2/utils/DataUtils.java

Lines changed: 6 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -250,9 +250,13 @@ private void processNormalFieldValues(JsonObject row, CreateCollectionReq.FieldS
250250
return;
251251
}
252252

253-
// if the field doesn't have default value, require user provide the value
254253
// in v2.6.1 support partial update, user can input partial fields
255-
if (!field.getIsNullable() && field.getDefaultValue() == null && !partialUpdate) {
254+
if (partialUpdate) {
255+
return;
256+
}
257+
258+
// if the field doesn't have default value, require user provide the value
259+
if (!field.getIsNullable() && field.getDefaultValue() == null) {
256260
String msg = String.format("The field: %s is not provided.", fieldName);
257261
throw new MilvusClientException(ErrorCode.INVALID_PARAMS, msg);
258262
}

sdk-core/src/test/java/io/milvus/v2/client/MilvusClientV2DockerTest.java

Lines changed: 69 additions & 38 deletions
Original file line numberDiff line numberDiff line change
@@ -1636,6 +1636,11 @@ void testDeleteUpsert() {
16361636
.dataType(DataType.FloatVector)
16371637
.dimension(4)
16381638
.build());
1639+
collectionSchema.addField(AddFieldReq.builder()
1640+
.fieldName("text")
1641+
.dataType(DataType.VarChar)
1642+
.maxLength(1024)
1643+
.build());
16391644

16401645
List<IndexParam> indexParams = new ArrayList<>();
16411646
indexParams.add(IndexParam.builder()
@@ -1657,7 +1662,8 @@ void testDeleteUpsert() {
16571662
List<JsonObject> data = new ArrayList<>();
16581663
for (int i = 0; i < 10; i++) {
16591664
JsonObject row = new JsonObject();
1660-
row.addProperty("pk", String.format("pk_%d", i));
1665+
row.addProperty("pk", "pk_" + i);
1666+
row.addProperty("text", "desc_" + i);
16611667
row.add("float_vector", JsonUtils.toJsonTree(new float[]{(float)i, (float)(i + 1), (float)(i + 2), (float)(i + 3)}));
16621668
data.add(row);
16631669
}
@@ -1682,57 +1688,82 @@ void testDeleteUpsert() {
16821688
Assertions.assertEquals(8L, rowCount);
16831689

16841690
// upsert
1685-
List<JsonObject> dataUpdate = new ArrayList<>();
1686-
JsonObject row1 = new JsonObject();
1687-
row1.addProperty("pk", "pk_5");
1688-
row1.add("float_vector", JsonUtils.toJsonTree(new float[]{5.0f, 5.0f, 5.0f, 5.0f}));
1689-
dataUpdate.add(row1);
1690-
JsonObject row2 = new JsonObject();
1691-
row2.addProperty("pk", "pk_2");
1692-
row2.add("float_vector", JsonUtils.toJsonTree(new float[]{2.0f, 2.0f, 2.0f, 2.0f}));
1693-
dataUpdate.add(row2);
1694-
UpsertResp upsertResp = client.upsert(UpsertReq.builder()
1695-
.databaseName(testDbName)
1696-
.collectionName(randomCollectionName)
1697-
.data(dataUpdate)
1698-
.build());
1699-
Assertions.assertEquals(2, upsertResp.getUpsertCnt());
1700-
Assertions.assertEquals(2, upsertResp.getPrimaryKeys().size());
1691+
// id=5 and id=8 has been deleted, need to provide all fields
1692+
{
1693+
JsonObject row1 = new JsonObject();
1694+
row1.addProperty("pk", "pk_5");
1695+
row1.addProperty("text", "updated_5");
1696+
row1.add("float_vector", JsonUtils.toJsonTree(new float[]{5.0f, 5.0f, 5.0f, 5.0f}));
1697+
1698+
JsonObject row2 = new JsonObject();
1699+
row2.addProperty("pk", "pk_8");
1700+
row2.addProperty("text", "updated_8");
1701+
row2.add("float_vector", JsonUtils.toJsonTree(new float[]{5.0f, 5.0f, 5.0f, 5.0f}));
1702+
1703+
UpsertResp upsertResp = client.upsert(UpsertReq.builder()
1704+
.databaseName(testDbName)
1705+
.collectionName(randomCollectionName)
1706+
.data(Arrays.asList(row1, row2))
1707+
.build());
1708+
Assertions.assertEquals(2, upsertResp.getUpsertCnt());
1709+
Assertions.assertEquals(2, upsertResp.getPrimaryKeys().size());
1710+
}
1711+
// id=2 is a partial update, "text" old value is reused
1712+
{
1713+
JsonObject row = new JsonObject();
1714+
row.addProperty("pk", "pk_2");
1715+
row.add("float_vector", JsonUtils.toJsonTree(new float[]{5.0f, 5.0f, 5.0f, 5.0f}));
1716+
1717+
UpsertResp upsertResp = client.upsert(UpsertReq.builder()
1718+
.databaseName(testDbName)
1719+
.collectionName(randomCollectionName)
1720+
.data(Collections.singletonList(row))
1721+
.partialUpdate(true)
1722+
.build());
1723+
Assertions.assertEquals(1, upsertResp.getUpsertCnt());
1724+
Assertions.assertEquals(1, upsertResp.getPrimaryKeys().size());
1725+
}
17011726

17021727
// get row count
17031728
rowCount = getRowCount(testDbName, randomCollectionName);
1704-
Assertions.assertEquals(9L, rowCount);
1729+
Assertions.assertEquals(10L, rowCount);
17051730

17061731
// verify
17071732
QueryResp queryResp = client.query(QueryReq.builder()
17081733
.databaseName(testDbName)
17091734
.collectionName(randomCollectionName)
1710-
.ids(Arrays.asList("pk_2", "pk_5"))
1735+
.ids(Arrays.asList("pk_2", "pk_5", "pk_8"))
17111736
.outputFields(Collections.singletonList("*"))
17121737
.build());
17131738
List<QueryResp.QueryResult> queryResults = queryResp.getQueryResults();
1714-
Assertions.assertEquals(2, queryResults.size());
1739+
Assertions.assertEquals(3, queryResults.size());
17151740

1716-
QueryResp.QueryResult result1 = queryResults.get(0);
1717-
Map<String, Object> entity1 = result1.getEntity();
1718-
Assertions.assertTrue(entity1.containsKey("pk"));
1719-
Assertions.assertEquals("pk_2", entity1.get("pk"));
1720-
Assertions.assertTrue(entity1.containsKey("float_vector"));
1721-
Assertions.assertTrue(entity1.get("float_vector") instanceof List);
1722-
List<Float> vector1 = (List<Float>) entity1.get("float_vector");
1723-
for (Float f : vector1) {
1724-
Assertions.assertEquals(2.0f, f);
1741+
{
1742+
QueryResp.QueryResult result = queryResults.get(0);
1743+
Map<String, Object> entity = result.getEntity();
1744+
Assertions.assertTrue(entity.containsKey("pk"));
1745+
Assertions.assertEquals("pk_2", entity.get("pk"));
1746+
Assertions.assertEquals("desc_2", entity.get("text"));
1747+
Assertions.assertTrue(entity.containsKey("float_vector"));
1748+
Assertions.assertTrue(entity.get("float_vector") instanceof List);
1749+
List<Float> vector1 = (List<Float>) entity.get("float_vector");
1750+
for (Float f : vector1) {
1751+
Assertions.assertEquals(5.0f, f);
1752+
}
17251753
}
17261754

1727-
QueryResp.QueryResult result2 = queryResults.get(1);
1728-
Map<String, Object> entity2 = result2.getEntity();
1729-
Assertions.assertTrue(entity2.containsKey("pk"));
1730-
Assertions.assertEquals("pk_5", entity2.get("pk"));
1731-
Assertions.assertTrue(entity2.containsKey("float_vector"));
1732-
Assertions.assertTrue(entity2.get("float_vector") instanceof List);
1733-
List<Float> vector2 = (List<Float>) entity2.get("float_vector");
1734-
for (Float f : vector2) {
1735-
Assertions.assertEquals(5.0f, f);
1755+
{
1756+
QueryResp.QueryResult result = queryResults.get(1);
1757+
Map<String, Object> entity = result.getEntity();
1758+
Assertions.assertTrue(entity.containsKey("pk"));
1759+
Assertions.assertEquals("pk_5", entity.get("pk"));
1760+
Assertions.assertEquals("updated_5", entity.get("text"));
1761+
Assertions.assertTrue(entity.containsKey("float_vector"));
1762+
Assertions.assertTrue(entity.get("float_vector") instanceof List);
1763+
List<Float> vector2 = (List<Float>) entity.get("float_vector");
1764+
for (Float f : vector2) {
1765+
Assertions.assertEquals(5.0f, f);
1766+
}
17361767
}
17371768

17381769
client.dropCollection(DropCollectionReq.builder()

0 commit comments

Comments
 (0)