[Bug]: [benchmark][cluster] Building Trie
index on VARCHAR field stuck: failed to create index, C Runtime Exception: Assert \"offset < get_num_rows()\" => field data subscript out of range
#35550
Closed
Description
Is there an existing issue for this?
- I have searched the existing issues
Environment
- Milvus version:master-20240816-2736a8b8-amd64
- Deployment mode(standalone or cluster):cluster
- MQ type(rocksmq, pulsar or kafka): pulsar
- SDK version(e.g. pymilvus v2.0.0rc2):2.4.5rc7
- OS(Ubuntu or CentOS):
- CPU/Memory:
- GPU:
- Others:
Current Behavior
argo task: fouramf-concurrent-qvs7s
test case name: test_bitmap_locust_shard1_dql_cluster
server:
NAME READY STATUS RESTARTS AGE IP NODE NOMINATED NODE READINESS GATES
bitmap-dql-etcd-0 1/1 Running 0 6m50s 10.104.16.164 4am-node21 <none> <none>
bitmap-dql-etcd-1 1/1 Running 0 6m50s 10.104.20.72 4am-node22 <none> <none>
bitmap-dql-etcd-2 1/1 Running 0 6m50s 10.104.24.98 4am-node29 <none> <none>
bitmap-dql-milvus-datanode-dcbb49bfc-gxmz7 1/1 Running 3 (2m8s ago) 6m50s 10.104.17.18 4am-node23 <none> <none>
bitmap-dql-milvus-indexnode-7dc59498c6-46tmp 1/1 Running 2 (6m10s ago) 6m50s 10.104.1.169 4am-node10 <none> <none>
bitmap-dql-milvus-indexnode-7dc59498c6-7fhzq 1/1 Running 2 (6m15s ago) 6m50s 10.104.18.192 4am-node25 <none> <none>
bitmap-dql-milvus-indexnode-7dc59498c6-99gzj 1/1 Running 2 (6m12s ago) 6m50s 10.104.32.216 4am-node39 <none> <none>
bitmap-dql-milvus-indexnode-7dc59498c6-r8fkq 1/1 Running 2 (6m14s ago) 6m50s 10.104.23.92 4am-node27 <none> <none>
bitmap-dql-milvus-mixcoord-5485d45bd5-pgslc 1/1 Running 3 (2m8s ago) 6m50s 10.104.17.17 4am-node23 <none> <none>
bitmap-dql-milvus-proxy-698c4c5497-bnzwb 1/1 Running 3 (2m5s ago) 6m50s 10.104.23.93 4am-node27 <none> <none>
bitmap-dql-milvus-querynode-5f4bffbfd8-8fnx8 1/1 Running 2 (6m14s ago) 6m50s 10.104.23.94 4am-node27 <none> <none>
bitmap-dql-milvus-querynode-5f4bffbfd8-jngd4 1/1 Running 2 (6m13s ago) 6m50s 10.104.17.19 4am-node23 <none> <none>
bitmap-dql-minio-0 1/1 Running 0 6m50s 10.104.30.130 4am-node38 <none> <none>
bitmap-dql-minio-1 1/1 Running 0 6m50s 10.104.20.69 4am-node22 <none> <none>
bitmap-dql-minio-2 1/1 Running 0 6m50s 10.104.16.165 4am-node21 <none> <none>
bitmap-dql-minio-3 1/1 Running 0 6m50s 10.104.18.205 4am-node25 <none> <none>
bitmap-dql-pulsar-bookie-0 1/1 Running 0 6m50s 10.104.18.202 4am-node25 <none> <none>
bitmap-dql-pulsar-bookie-1 1/1 Running 0 6m50s 10.104.16.167 4am-node21 <none> <none>
bitmap-dql-pulsar-bookie-2 1/1 Running 0 6m49s 10.104.20.73 4am-node22 <none> <none>
bitmap-dql-pulsar-bookie-init-bwsn2 0/1 Completed 0 6m50s 10.104.16.152 4am-node21 <none> <none>
bitmap-dql-pulsar-broker-0 1/1 Running 0 6m50s 10.104.18.189 4am-node25 <none> <none>
bitmap-dql-pulsar-proxy-0 1/1 Running 0 6m50s 10.104.32.215 4am-node39 <none> <none>
bitmap-dql-pulsar-pulsar-init-tmdxz 0/1 Completed 0 6m50s 10.104.20.62 4am-node22 <none> <none>
bitmap-dql-pulsar-recovery-0 1/1 Running 0 6m50s 10.104.16.153 4am-node21 <none> <none>
bitmap-dql-pulsar-zookeeper-0 1/1 Running 0 6m50s 10.104.20.68 4am-node22 <none> <none>
bitmap-dql-pulsar-zookeeper-1 1/1 Running 0 5m54s 10.104.17.28 4am-node23 <none> <none>
bitmap-dql-pulsar-zookeeper-2 1/1 Running 0 5m1s 10.104.33.44 4am-node36 <none> <none>
client pod name: fouramf-concurrent-qvs7s-473107777
client log:
[2024-08-16 11:20:14,830 - INFO - fouram]: [Base] Collection schema:
{'auto_id': False,
'description': '',
'fields': [{'name': 'id', 'description': '', 'type': <DataType.INT64: 5>, 'is_primary': True, 'auto_id': False}, {'name': 'float_vector', 'description': '', 'type': <DataType.FLOAT_VECTOR: 101>, 'params': {'dim': 128}},
{'name': 'sparse_float_vector', 'description': '', 'type': <DataType.SPARSE_FLOAT_VECTOR: 104>}, {'name': 'int8_1', 'description': '', 'type': <DataType.INT8: 2>},
{'name': 'int16_1', 'description': '', 'type': <DataType.INT16: 3>}, {'name': 'int32_1', 'description': '', 'type': <DataType.INT32: 4>}, {'name': 'int64_1', 'description': '', 'type': <DataType.INT64: 5>},
{'name': 'double_1', 'description': '', 'type': <DataType.DOUBLE: 11>}, {'name': 'float_1', 'description': '', 'type': <DataType.FLOAT: 10>},
{'name': 'varchar_1', 'description': '', 'type': <DataType.VARCHAR: 21>, 'params': {'max_length': 100}}, {'name': 'bool_1', 'description': '', 'type': <DataType.BOOL: 1>},
{'name': 'json_1', 'description': '', 'type': <DataType.JSON: 23>}, {'name': 'array_int8_1', 'description': '', 'type': <DataType.ARRAY: 22>, 'params': {'max_capacity': 11}, 'element_type': <DataType.INT8: 2>},
{'name': 'array_int16_1', 'description': '', 'type': <DataType.ARRAY: 22>, 'params': {'max_capacity': 11}, 'element_type': <DataType.INT16: 3>},
{'name': 'array_int32_1', 'description': '', 'type': <DataType.ARRAY: 22>, 'params': {'max_capacity': 11}, 'element_type': <DataType.INT32: 4>},
{'name': 'array_int64_1', 'description': '', 'type': <DataType.ARRAY: 22>, 'params': {'max_capacity': 11}, 'element_type': <DataType.INT64: 5>},
{'name': 'array_double_1', 'description': '', 'type': <DataType.ARRAY: 22>, 'params': {'max_capacity': 11}, 'element_type': <DataType.DOUBLE: 11>},
{'name': 'array_float_1', 'description': '', 'type': <DataType.ARRAY: 22>, 'params': {'max_capacity': 11}, 'element_type': <DataType.FLOAT: 10>},
{'name': 'array_varchar_1', 'description': '', 'type': <DataType.ARRAY: 22>, 'params': {'max_length': 100, 'max_capacity': 11}, 'element_type': <DataType.VARCHAR: 21>},
{'name': 'array_bool_1', 'description': '', 'type': <DataType.ARRAY: 22>, 'params': {'max_capacity': 11}, 'element_type': <DataType.BOOL: 1>}, {'name': 'int8_2', 'description': '', 'type': <DataType.INT8: 2>},
{'name': 'int16_2', 'description': '', 'type': <DataType.INT16: 3>}, {'name': 'int32_2', 'description': '', 'type': <DataType.INT32: 4>}, {'name': 'int64_2', 'description': '', 'type': <DataType.INT64: 5>},
{'name': 'double_2', 'description': '', 'type': <DataType.DOUBLE: 11>}, {'name': 'float_2', 'description': '', 'type': <DataType.FLOAT: 10>},
{'name': 'varchar_2', 'description': '', 'type': <DataType.VARCHAR: 21>, 'params': {'max_length': 100}}, {'name': 'bool_2', 'description': '', 'type': <DataType.BOOL: 1>},
{'name': 'json_2', 'description': '', 'type': <DataType.JSON: 23>}, {'name': 'array_int8_2', 'description': '', 'type': <DataType.ARRAY: 22>, 'params': {'max_capacity': 11}, 'element_type': <DataType.INT8: 2>},
{'name': 'array_int16_2', 'description': '', 'type': <DataType.ARRAY: 22>, 'params': {'max_capacity': 11}, 'element_type': <DataType.INT16: 3>},
{'name': 'array_int32_2', 'description': '', 'type': <DataType.ARRAY: 22>, 'params': {'max_capacity': 11}, 'element_type': <DataType.INT32: 4>},
{'name': 'array_int64_2', 'description': '', 'type': <DataType.ARRAY: 22>, 'params': {'max_capacity': 11}, 'element_type': <DataType.INT64: 5>},
{'name': 'array_double_2', 'description': '', 'type': <DataType.ARRAY: 22>, 'params': {'max_capacity': 11}, 'element_type': <DataType.DOUBLE: 11>},
{'name': 'array_float_2', 'description': '', 'type': <DataType.ARRAY: 22>, 'params': {'max_capacity': 11}, 'element_type': <DataType.FLOAT: 10>},
{'name': 'array_varchar_2', 'description': '', 'type': <DataType.ARRAY: 22>, 'params': {'max_length': 100, 'max_capacity': 11}, 'element_type': <DataType.VARCHAR: 21>},
{'name': 'array_bool_2', 'description': '', 'type': <DataType.ARRAY: 22>, 'params': {'max_capacity': 11}, 'element_type': <DataType.BOOL: 1>}],
'enable_dynamic_field': False}
Expected Behavior
No response
Steps To Reproduce
concurrent test and calculation of RT and QPS
:purpose: `primary key: INT64`, shard_num=1
1. building `BITMAP` index on INT64 primary key and all supported 12 scalar fields
2. the other 22 scalar fields build `INVERTED`, `Trie`, `STL_SORT` indexes
3. 2 fields of different vector types
4. search for different expressions on BITMAP index fields
:test steps:
1. create collection with fields:
'float_vector': 128dim
'sparse_float_vector': sparse_range=[1, 100] <- the range of non-zero values of a sparse vector
'id': primary key type is INT64
all scalar fields: varchar max_length=100, array max_capacity=11
2. build indexes:
IVF_SQ8: 'float_vector'
SPARSE_WAND: 'sparse_float_vector'
BITMAP: 'id', '*_1' all supported field names
INVERTED: 'array_float_1', 'array_double_1', 'float_2', 'double_2', 'bool_2', 'array_int8_2',
'array_int16_2', 'array_int32_2', 'array_int64_2', 'array_varchar_2', 'array_bool_2',
'array_float_2', 'array_double_2'
Trie: 'varchar_2'
STL_SORT: 'float_1', 'double_1', 'int8_2', 'int16_2', 'int32_2', 'int64_2'
3. insert 5 million data
4. flush collection
5. build indexes again using the same params <- build Trie index stuck
Milvus Log
No response
Anything else?
client config:
{
"dataset_params": {
"metric_type": "L2",
"dim": 128,
"max_length": 100,
"scalars_index": {
"int8_1": {
"index_type": "BITMAP"
},
"int16_1": {
"index_type": "BITMAP"
},
"int32_1": {
"index_type": "BITMAP"
},
"int64_1": {
"index_type": "BITMAP"
},
"varchar_1": {
"index_type": "BITMAP"
},
"bool_1": {
"index_type": "BITMAP"
},
"array_int8_1": {
"index_type": "BITMAP"
},
"array_int16_1": {
"index_type": "BITMAP"
},
"array_int32_1": {
"index_type": "BITMAP"
},
"array_int64_1": {
"index_type": "BITMAP"
},
"array_varchar_1": {
"index_type": "BITMAP"
},
"array_bool_1": {
"index_type": "BITMAP"
},
"array_float_1": {
"index_type": "INVERTED"
},
"array_double_1": {
"index_type": "INVERTED"
},
"float_2": {
"index_type": "INVERTED"
},
"double_2": {
"index_type": "INVERTED"
},
"bool_2": {
"index_type": "INVERTED"
},
"array_int8_2": {
"index_type": "INVERTED"
},
"array_int16_2": {
"index_type": "INVERTED"
},
"array_int32_2": {
"index_type": "INVERTED"
},
"array_int64_2": {
"index_type": "INVERTED"
},
"array_varchar_2": {
"index_type": "INVERTED"
},
"array_bool_2": {
"index_type": "INVERTED"
},
"array_float_2": {
"index_type": "INVERTED"
},
"array_double_2": {
"index_type": "INVERTED"
},
"varchar_2": {
"index_type": "Trie"
},
"float_1": {
"index_type": "STL_SORT"
},
"double_1": {
"index_type": "STL_SORT"
},
"int8_2": {
"index_type": "STL_SORT"
},
"int16_2": {
"index_type": "STL_SORT"
},
"int32_2": {
"index_type": "STL_SORT"
},
"int64_2": {
"index_type": "STL_SORT"
}
},
"vectors_index": {
"sparse_float_vector": {
"index_type": "SPARSE_INVERTED_INDEX",
"index_param": {
"drop_ratio_build": 0.2
},
"metric_type": "IP"
}
},
"scalars_params": {
"array_int8_1": {
"params": {
"max_capacity": 11
},
"other_params": {
"dataset": "random_algorithm",
"algorithm_params": {
"algorithm_name": "random_range",
"specify_range": [
-2500,
2500
],
"max_capacity": 9
}
}
},
"array_int16_1": {
"params": {
"max_capacity": 11
},
"other_params": {
"dataset": "random_algorithm",
"algorithm_params": {
"algorithm_name": "random_range",
"specify_range": [
-2500,
2500
],
"max_capacity": 9
}
}
},
"array_int32_1": {
"params": {
"max_capacity": 11
},
"other_params": {
"dataset": "random_algorithm",
"algorithm_params": {
"algorithm_name": "random_range",
"specify_range": [
-2500,
2500
],
"max_capacity": 9
}
}
},
"array_int64_1": {
"params": {
"max_capacity": 11
},
"other_params": {
"dataset": "random_algorithm",
"algorithm_params": {
"algorithm_name": "random_range",
"specify_range": [
-2500,
2500
],
"max_capacity": 9
}
}
},
"array_double_1": {
"params": {
"max_capacity": 11
}
},
"array_float_1": {
"params": {
"max_capacity": 11
}
},
"array_varchar_1": {
"params": {
"max_capacity": 11
},
"other_params": {
"dataset": "random_algorithm",
"algorithm_params": {
"algorithm_name": "random_range",
"specify_range": [
-2500,
2500
],
"max_capacity": 9
}
}
},
"array_bool_1": {
"params": {
"max_capacity": 11
},
"other_params": {
"dataset": "random_algorithm",
"algorithm_params": {
"algorithm_name": "random_range",
"specify_range": [
-2500,
2500
],
"max_capacity": 9
}
}
},
"array_int8_2": {
"params": {
"max_capacity": 11
}
},
"array_int16_2": {
"params": {
"max_capacity": 11
}
},
"array_int32_2": {
"params": {
"max_capacity": 11
}
},
"array_int64_2": {
"params": {
"max_capacity": 11
}
},
"array_double_2": {
"params": {
"max_capacity": 11
}
},
"array_float_2": {
"params": {
"max_capacity": 11
}
},
"array_varchar_2": {
"params": {
"max_capacity": 11
}
},
"array_bool_2": {
"params": {
"max_capacity": 11
}
},
"int8_1": {
"other_params": {
"dataset": "random_algorithm",
"algorithm_params": {
"algorithm_name": "random_range",
"specify_range": [
-2500,
2500
],
"max_capacity": 9
}
}
},
"int16_1": {
"other_params": {
"dataset": "random_algorithm",
"algorithm_params": {
"algorithm_name": "random_range",
"specify_range": [
-2500,
2500
],
"max_capacity": 9
}
}
},
"int32_1": {
"other_params": {
"dataset": "random_algorithm",
"algorithm_params": {
"algorithm_name": "random_range",
"specify_range": [
-2500,
2500
],
"max_capacity": 9
}
}
},
"int64_1": {
"other_params": {
"dataset": "random_algorithm",
"algorithm_params": {
"algorithm_name": "random_range",
"specify_range": [
-2500,
2500
],
"max_capacity": 9
}
}
},
"varchar_1": {
"other_params": {
"dataset": "random_algorithm",
"algorithm_params": {
"algorithm_name": "random_range",
"specify_range": [
-2500,
2500
],
"max_capacity": 9
}
}
},
"bool_1": {
"other_params": {
"dataset": "random_algorithm",
"algorithm_params": {
"algorithm_name": "random_range",
"specify_range": [
-2500,
2500
],
"max_capacity": 9
}
}
}
},
"dataset_name": "sift",
"dataset_size": 5000000,
"ni_per": 5000
},
"collection_params": {
"other_fields": [
"sparse_float_vector",
"int8_1",
"int16_1",
"int32_1",
"int64_1",
"double_1",
"float_1",
"varchar_1",
"bool_1",
"json_1",
"array_int8_1",
"array_int16_1",
"array_int32_1",
"array_int64_1",
"array_double_1",
"array_float_1",
"array_varchar_1",
"array_bool_1",
"int8_2",
"int16_2",
"int32_2",
"int64_2",
"double_2",
"float_2",
"varchar_2",
"bool_2",
"json_2",
"array_int8_2",
"array_int16_2",
"array_int32_2",
"array_int64_2",
"array_double_2",
"array_float_2",
"array_varchar_2",
"array_bool_2"
],
"shards_num": 1
},
"load_params": {
"replica_number": 2
},
"resource_groups_params": {
"reset": false
},
"database_user_params": {
"reset_rbac": false,
"reset_db": false
},
"index_params": {
"index_type": "IVF_SQ8",
"index_param": {
"nlist": 1024
}
},
"concurrent_params": {
"concurrent_number": [
1
],
"during_time": "24h",
"interval": 20
},
"concurrent_tasks": [
{
"type": "search",
"weight": 1,
"params": {
"nq": 1000,
"top_k": 10,
"search_param": {
"nprobe": 16
},
"expr": "id >= 100",
"guarantee_timestamp": null,
"partition_names": null,
"output_fields": null,
"ignore_growing": false,
"group_by_field": null,
"timeout": null,
"random_data": true,
"check_task": "check_search_output",
"check_items": null
}
},
{
"type": "query",
"weight": 1,
"params": {
"ids": null,
"expr": "id > -1 && ",
"output_fields": null,
"offset": null,
"limit": null,
"ignore_growing": false,
"partition_names": null,
"timeout": null,
"random_data": true,
"random_count": 10,
"random_range": [
0,
5000000
],
"field_name": "id",
"field_type": "int64",
"check_task": "check_query_output",
"check_items": null
}
},
{
"type": "hybrid_search",
"weight": 1,
"params": {
"nq": 10,
"top_k": 10,
"reqs": [
{
"search_param": {
"nprobe": 128
},
"anns_field": "float_vector",
"expr": "(9 ** 2) < float_1",
"top_k": 100
},
{
"search_param": {
"nprobe": 128
},
"anns_field": "float_vector",
"expr": "(int16_1 / 100) <= 100",
"top_k": 100
},
{
"search_param": {
"nprobe": 128
},
"anns_field": "float_vector",
"expr": "!(int32_1 != int16_1)",
"top_k": 100
},
{
"search_param": {
"nprobe": 128
},
"anns_field": "float_vector",
"expr": "not (int64_1 == int8_1)",
"top_k": 100
},
{
"search_param": {
"nprobe": 128
},
"anns_field": "float_vector",
"expr": "(exists json_1['id']) && (varchar_1 like \"1%\")",
"top_k": 100
},
{
"search_param": {
"nprobe": 128
},
"anns_field": "float_vector",
"expr": "!(EXISTS json_2['id']) and (bool_1 == true)",
"top_k": 100
},
{
"search_param": {
"nprobe": 128
},
"anns_field": "float_vector",
"expr": "array_length(array_int8_1) == 11",
"top_k": 100
},
{
"search_param": {
"nprobe": 128
},
"anns_field": "float_vector",
"expr": "ARRAY_LENGTH(array_int16_1) != 11",
"top_k": 100
},
{
"search_param": {
"nprobe": 128
},
"anns_field": "float_vector",
"expr": "(array_contains_any(array_int32_1, [0]) || array_contains(array_int32_1, 1)) || ((bool_1 == True) and (bool_2 == TRUE))",
"top_k": 100
},
{
"search_param": {
"nprobe": 128
},
"anns_field": "float_vector",
"expr": "ARRAY_CONTAINS_ANY(array_int64_1, [-2500]) or array_contains_all(array_int64_1, [-1, 1])",
"top_k": 100
},
{
"search_param": {
"nprobe": 128
},
"anns_field": "float_vector",
"expr": "ARRAY_CONTAINS_ALL(array_varchar_1, [\"-2\", \"-1\"]) || ARRAY_CONTAINS(array_varchar_1, \"0\")",
"top_k": 100
},
{
"search_param": {
"nprobe": 128
},
"anns_field": "float_vector",
"expr": "array_length(array_bool_1) == (11 * 11 / 11)",
"top_k": 100
},
{
"search_param": {
"drop_ratio_search": 0.1
},
"anns_field": "sparse_float_vector",
"expr": "(int32_1 % 100) <= 50",
"top_k": 30
},
{
"search_param": {
"drop_ratio_search": 0.1
},
"anns_field": "sparse_float_vector",
"expr": "(varchar_1 like \"1%\") && (bool_1 == True)"
}
],
"rerank": {
"RRFRanker": []
},
"output_fields": null,
"ignore_growing": false,
"guarantee_timestamp": null,
"partition_names": null,
"timeout": null,
"random_data": true,
"check_task": "check_search_output",
"check_items": null
}
}
]
}