Skip to content

Commit

Permalink
test: update import test case to support different dim (milvus-io#33709)
Browse files Browse the repository at this point in the history
add test case for milvus-io#33681

---------

Signed-off-by: zhuwenxing <wenxing.zhu@zilliz.com>
  • Loading branch information
zhuwenxing authored Jun 13, 2024
1 parent 144ee26 commit ca1f7ab
Show file tree
Hide file tree
Showing 3 changed files with 126 additions and 61 deletions.
30 changes: 25 additions & 5 deletions tests/python_client/common/bulk_insert_data.py
Original file line number Diff line number Diff line change
Expand Up @@ -491,10 +491,16 @@ def gen_sparse_vectors(rows, sparse_format="dok"):
return vectors


def gen_data_by_data_field(data_field, rows, start=0, float_vector=True, dim=128, array_length=None, sparse_format="dok"):
def gen_data_by_data_field(data_field, rows, start=0, float_vector=True, dim=128, array_length=None, sparse_format="dok", **kwargs):
if array_length is None:
array_length = random.randint(0, 10)

schema = kwargs.get("schema", None)
schema = schema.to_dict() if schema is not None else None
if schema is not None:
fields = schema.get("fields", [])
for field in fields:
if data_field == field["name"] and "params" in field:
dim = field["params"].get("dim", dim)
data = []
if rows > 0:
if "vec" in data_field:
Expand Down Expand Up @@ -618,10 +624,18 @@ def gen_json_files(is_row_based, rows, dim, auto_id, str_pk,


def gen_dict_data_by_data_field(data_fields, rows, start=0, float_vector=True, dim=128, array_length=None, enable_dynamic_field=False, **kwargs):
schema = kwargs.get("schema", None)
schema = schema.to_dict() if schema is not None else None
data = []
for r in range(rows):
d = {}
for data_field in data_fields:
if schema is not None:
fields = schema.get("fields", [])
for field in fields:
if data_field == field["name"] and "params" in field:
dim = field["params"].get("dim", dim)

if "vec" in data_field:
if "float" in data_field:
float_vector = True
Expand Down Expand Up @@ -718,19 +732,24 @@ def gen_new_json_files(float_vector, rows, dim, data_fields, file_nums=1, array_
def gen_npy_files(float_vector, rows, dim, data_fields, file_size=None, file_nums=1, err_type="", force=False, enable_dynamic_field=False, include_meta=True, **kwargs):
# gen numpy files
schema = kwargs.get("schema", None)
schema = schema.to_dict() if schema is not None else None
u_id = f"numpy-{uuid.uuid4()}"
data_source_new = f"{data_source}/{u_id}"
schema_file = f"{data_source_new}/schema.json"
Path(schema_file).parent.mkdir(parents=True, exist_ok=True)
if schema is not None:
data = schema.to_dict()
with open(schema_file, "w") as f:
json.dump(data, f)
json.dump(schema, f)
files = []
start_uid = 0
if file_nums == 1:
# gen the numpy file without subfolders if only one set of files
for data_field in data_fields:
if schema is not None:
fields = schema.get("fields", [])
for field in fields:
if data_field == field["name"] and "params" in field:
dim = field["params"].get("dim", dim)
if "vec" in data_field:
vector_type = "float32"
if "float" in data_field:
Expand All @@ -745,6 +764,7 @@ def gen_npy_files(float_vector, rows, dim, data_fields, file_size=None, file_num
if "fp16" in data_field:
float_vector = True
vector_type = "fp16"

file_name = gen_vectors_in_numpy_file(dir=data_source_new, data_field=data_field, float_vector=float_vector,
vector_type=vector_type, rows=rows, dim=dim, force=force)
elif data_field == DataField.string_field: # string field for numpy not supported yet at 2022-10-17
Expand Down Expand Up @@ -830,7 +850,7 @@ def gen_parquet_files(float_vector, rows, dim, data_fields, file_size=None, row_
all_field_data = {}
for data_field in data_fields:
data = gen_data_by_data_field(data_field=data_field, rows=rows, start=0,
float_vector=float_vector, dim=dim, array_length=array_length, sparse_format=sparse_format)
float_vector=float_vector, dim=dim, array_length=array_length, sparse_format=sparse_format, **kwargs)
all_field_data[data_field] = data
if enable_dynamic_field and include_meta:
all_field_data["$meta"] = gen_dynamic_field_data_in_parquet_file(rows=rows, start=0)
Expand Down
2 changes: 1 addition & 1 deletion tests/python_client/pytest.ini
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
[pytest]


addopts = --host localhost --html=/tmp/ci_logs/report.html --self-contained-html -v --log-cli-level=INFO --capture=no
addopts = --host localhost --html=/tmp/ci_logs/report.html --self-contained-html -v
# python3 -W ignore -m pytest

log_format = [%(asctime)s - %(levelname)s - %(name)s]: %(message)s (%(filename)s:%(lineno)s)
Expand Down
Loading

0 comments on commit ca1f7ab

Please sign in to comment.