diff --git a/tests/python_client/check/func_check.py b/tests/python_client/check/func_check.py index 2347004950c6f..dfb339aad6eb0 100644 --- a/tests/python_client/check/func_check.py +++ b/tests/python_client/check/func_check.py @@ -121,9 +121,13 @@ def assert_exception(self, res, actual=True, error_dict=None): assert len(error_dict) > 0 if isinstance(res, Error): error_code = error_dict[ct.err_code] - assert res.code == error_code or error_dict[ct.err_msg] in res.message, ( + # assert res.code == error_code or error_dict[ct.err_msg] in res.message, ( + # f"Response of API {self.func_name} " + # f"expect get error code {error_dict[ct.err_code]} or error message {error_dict[ct.err_code]}, " + # f"but got {res.code} {res.message}") + assert error_dict[ct.err_msg] in res.message, ( f"Response of API {self.func_name} " - f"expect get error code {error_dict[ct.err_code]} or error message {error_dict[ct.err_code]}, " + f"expect get error message {error_dict[ct.err_code]}, " f"but got {res.code} {res.message}") else: diff --git a/tests/python_client/common/code_mapping.py b/tests/python_client/common/code_mapping.py index e2d965282d073..355cb3baeeda3 100644 --- a/tests/python_client/common/code_mapping.py +++ b/tests/python_client/common/code_mapping.py @@ -34,9 +34,10 @@ class IndexErrorMessage(ExceptionsMessage): WrongFieldName = "cannot create index on non-vector field: %s" DropLoadedIndex = "index cannot be dropped, collection is loaded, please release it first" CheckVectorIndex = "data type {0} can't build with this index {1}" - SparseFloatVectorMetricType = "only IP is the supported metric type for sparse index" + SparseFloatVectorMetricType = "only IP&BM25 is the supported metric type for sparse index" VectorMetricTypeExist = "metric type not set for vector index" - CheckBitmapIndex = "bitmap index are only supported on bool, int, string and array field" + # please update the msg below as #37543 fixed + CheckBitmapIndex = "bitmap index are only supported on bool, int, string" CheckBitmapOnPK = "create bitmap index on primary key not supported" CheckBitmapCardinality = "failed to check bitmap cardinality limit, should be larger than 0 and smaller than 1000" NotConfigable = "{0} is not configable index param" diff --git a/tests/python_client/common/common_func.py b/tests/python_client/common/common_func.py index 3b9553fb783b7..f8e3722be3747 100644 --- a/tests/python_client/common/common_func.py +++ b/tests/python_client/common/common_func.py @@ -2227,7 +2227,7 @@ def gen_invalid_search_params_type(): if index_type == "FLAT": continue # search_params.append({"index_type": index_type, "search_params": {"invalid_key": invalid_search_key}}) - if index_type in ["IVF_FLAT", "IVF_SQ8", "IVF_PQ"]: + if index_type in ["IVF_FLAT", "IVF_SQ8", "IVF_PQ", "BIN_FLAT", "BIN_IVF_FLAT"]: for nprobe in ct.get_invalid_ints: ivf_search_params = {"index_type": index_type, "search_params": {"nprobe": nprobe}} search_params.append(ivf_search_params) @@ -2307,35 +2307,6 @@ def gen_autoindex_search_params(): return search_params -def gen_invalid_search_param(index_type, metric_type="L2"): - search_params = [] - if index_type in ["FLAT", "IVF_FLAT", "IVF_SQ8", "IVF_PQ"] \ - or index_type in ["BIN_FLAT", "BIN_IVF_FLAT"]: - for nprobe in [-1]: - ivf_search_params = {"metric_type": metric_type, "params": {"nprobe": nprobe}} - search_params.append(ivf_search_params) - elif index_type in ["HNSW"]: - for ef in [-1]: - hnsw_search_param = {"metric_type": metric_type, "params": {"ef": ef}} - search_params.append(hnsw_search_param) - elif index_type == "ANNOY": - for search_k in ["-2"]: - annoy_search_param = {"metric_type": metric_type, "params": {"search_k": search_k}} - search_params.append(annoy_search_param) - elif index_type == "DISKANN": - for search_list in ["-1"]: - diskann_search_param = {"metric_type": metric_type, "params": {"search_list": search_list}} - search_params.append(diskann_search_param) - elif index_type == "SCANN": - for reorder_k in [-1]: - scann_search_param = {"metric_type": metric_type, "params": {"reorder_k": reorder_k, "nprobe": 10}} - search_params.append(scann_search_param) - else: - log.error("Invalid index_type.") - raise Exception("Invalid index_type.") - return search_params - - def gen_all_type_fields(): fields = [] for k, v in DataType.__members__.items(): @@ -2345,49 +2316,98 @@ def gen_all_type_fields(): return fields -def gen_normal_expressions(): +def gen_normal_expressions_and_templates(): + """ + Gen a list of filter in expression-format(as a string) and template-format(as a dict) + The two formats equals to each other. + """ expressions = [ - "", - "int64 > 0", - "(int64 > 0 && int64 < 400) or (int64 > 500 && int64 < 1000)", - "int64 not in [1, 2, 3]", - "int64 in [1, 2, 3] and float != 2", - "int64 == 0 || float == 10**2 || (int64 + 1) == 3", - "0 <= int64 < 400 and int64 % 100 == 0", - "200+300 < int64 <= 500+500", - "int64 > 400 && int64 < 200", - "int64 in [300/2, 900%40, -10*30+800, (100+200)*2] or float in [+3**6, 2**10/2]", - "float <= -4**5/2 && float > 500-1 && float != 500/2+260" + ["", {"expr": "", "expr_params": {}}], + ["int64 > 0", {"expr": "int64 > {value_0}", "expr_params": {"value_0": 0}}], + ["(int64 > 0 && int64 < 400) or (int64 > 500 && int64 < 1000)", + {"expr": "(int64 > {value_0} && int64 < {value_1}) or (int64 > {value_2} && int64 < {value_3})", + "expr_params": {"value_0": 0, "value_1": 400, "value_2": 500, "value_3": 1000}}], + ["int64 not in [1, 2, 3]", {"expr": "int64 not in {value_0}", "expr_params": {"value_0": [1, 2, 3]}}], + ["int64 in [1, 2, 3] and float != 2", {"expr": "int64 in {value_0} and float != {value_1}", + "expr_params": {"value_0": [1, 2, 3], "value_1": 2}}], + ["int64 == 0 || float == 10**2 || (int64 + 1) == 3", + {"expr": "int64 == {value_0} || float == {value_1} || (int64 + {value_2}) == {value_3}", + "expr_params": {"value_0": 0, "value_1": 10**2, "value_2": 1, "value_3": 3}}], + ["0 <= int64 < 400 and int64 % 100 == 0", + {"expr": "{value_0} <= int64 < {value_1} and int64 % {value_2} == {value_0}", + "expr_params": {"value_0": 0, "value_1": 400, "value_2": 100}}], + ["200+300 < int64 <= 500+500", {"expr": "{value_0} < int64 <= {value_1}", + "expr_params": {"value_1": 500+500, "value_0": 200+300}}], + ["int64 > 400 && int64 < 200", {"expr": "int64 > {value_0} && int64 < {value_1}", + "expr_params": {"value_0": 400, "value_1": 200}}], + ["int64 in [300/2, 900%40, -10*30+800, (100+200)*2] or float in [+3**6, 2**10/2]", + {"expr": "int64 in {value_0} or float in {value_1}", + "expr_params": {"value_0": [int(300/2), 900%40, -10*30+800, (100+200)*2], "value_1": [+3**6*1.0, 2**10/2*1.0]}}], + ["float <= -4**5/2 && float > 500-1 && float != 500/2+260", + {"expr": "float <= {value_0} && float > {value_1} && float != {value_2}", + "expr_params": {"value_0": -4**5/2, "value_1": 500-1, "value_2": 500/2+260}}], ] return expressions -def gen_json_field_expressions(): +def gen_json_field_expressions_and_templates(): + """ + Gen a list of filter in expression-format(as a string) and template-format(as a dict) + The two formats equals to each other. + """ expressions = [ - "json_field['number'] > 0", - "0 <= json_field['number'] < 400 or 1000 > json_field['number'] >= 500", - "json_field['number'] not in [1, 2, 3]", - "json_field['number'] in [1, 2, 3] and json_field['float'] != 2", - "json_field['number'] == 0 || json_field['float'] == 10**2 || json_field['number'] + 1 == 3", - "json_field['number'] < 400 and json_field['number'] >= 100 and json_field['number'] % 100 == 0", - "json_field['float'] > 400 && json_field['float'] < 200", - "json_field['number'] in [300/2, -10*30+800, (100+200)*2] or json_field['float'] in [+3**6, 2**10/2]", - "json_field['float'] <= -4**5/2 && json_field['float'] > 500-1 && json_field['float'] != 500/2+260" + ["json_field['number'] > 0", {"expr": "json_field['number'] > {value_0}", "expr_params": {"value_0": 0}}], + ["0 <= json_field['number'] < 400 or 1000 > json_field['number'] >= 500", + {"expr": "{value_0} <= json_field['number'] < {value_1} or {value_2} > json_field['number'] >= {value_3}", + "expr_params": {"value_0": 0, "value_1": 400, "value_2": 1000, "value_3": 500}}], + ["json_field['number'] not in [1, 2, 3]", {"expr": "json_field['number'] not in {value_0}", + "expr_params": {"value_0": [1, 2, 3]}}], + ["json_field['number'] in [1, 2, 3] and json_field['float'] != 2", + {"expr": "json_field['number'] in {value_0} and json_field['float'] != {value_1}", + "expr_params": {"value_0": [1, 2, 3], "value_1": 2}}], + ["json_field['number'] == 0 || json_field['float'] == 10**2 || json_field['number'] + 1 == 3", + {"expr": "json_field['number'] == {value_0} || json_field['float'] == {value_1} || json_field['number'] + {value_2} == {value_3}", + "expr_params": {"value_0": 0, "value_1": 10**2, "value_2": 1, "value_3": 3}}], + ["json_field['number'] < 400 and json_field['number'] >= 100 and json_field['number'] % 100 == 0", + {"expr": "json_field['number'] < {value_0} and json_field['number'] >= {value_1} and json_field['number'] % {value_1} == 0", + "expr_params": {"value_0": 400, "value_1": 100}}], + ["json_field['float'] > 400 && json_field['float'] < 200", {"expr": "json_field['float'] > {value_0} && json_field['float'] < {value_1}", + "expr_params": {"value_0": 400, "value_1": 200}}], + ["json_field['number'] in [300/2, -10*30+800, (100+200)*2] or json_field['float'] in [+3**6, 2**10/2]", + {"expr": "json_field['number'] in {value_0} or json_field['float'] in {value_1}", + "expr_params": {"value_0": [int(300/2), -10*30+800, (100+200)*2], "value_1": [+3**6*1.0, 2**10/2*1.0]}}], + ["json_field['float'] <= -4**5/2 && json_field['float'] > 500-1 && json_field['float'] != 500/2+260", + {"expr": "json_field['float'] <= {value_0} && json_field['float'] > {value_1} && json_field['float'] != {value_2}", + "expr_params": {"value_0": -4**5/2, "value_1": 500-1, "value_2": 500/2+260}}], ] return expressions -def gen_array_field_expressions(): +def gen_array_field_expressions_and_templates(): + """ + Gen a list of filter in expression-format(as a string) and template-format(as a dict) for a field. + The two formats equals to each other. + """ expressions = [ - "int32_array[0] > 0", - "0 <= int32_array[0] < 400 or 1000 > float_array[1] >= 500", - "int32_array[1] not in [1, 2, 3]", - "int32_array[1] in [1, 2, 3] and string_array[1] != '2'", - "int32_array == [1, 2, 3, 4, 5, 6, 7, 8, 9, 10]", - "int32_array[1] + 1 == 3 && int32_array[0] - 1 != 1", - "int32_array[1] % 100 == 0 && string_array[1] in ['1', '2']", - "int32_array[1] in [300/2, -10*30+800, (200-100)*2] " - "or (float_array[1] <= -4**5/2 || 100 <= int32_array[1] < 200)" + ["int32_array[0] > 0", {"expr": "int32_array[0] > {value_0}", "expr_params": {"value_0": 0}}], + ["0 <= int32_array[0] < 400 or 1000 > float_array[1] >= 500", + {"expr": "{value_0} <= int32_array[0] < {value_1} or {value_2} > float_array[1] >= {value_3}", + "expr_params": {"value_0": 0, "value_1": 400, "value_2": 1000, "value_3": 500}}], + ["int32_array[1] not in [1, 2, 3]", {"expr": "int32_array[1] not in {value_0}", "expr_params": {"value_0": [1, 2, 3]}}], + ["int32_array[1] in [1, 2, 3] and string_array[1] != '2'", + {"expr": "int32_array[1] in {value_0} and string_array[1] != {value_2}", + "expr_params": {"value_0": [1, 2, 3], "value_2": "2"}}], + ["int32_array == [1, 2, 3, 4, 5, 6, 7, 8, 9, 10]", {"expr": "int32_array == {value_0}", + "expr_params": {"value_0": [1, 2, 3, 4, 5, 6, 7, 8, 9, 10]}}], + ["int32_array[1] + 1 == 3 && int32_array[0] - 1 != 1", + {"expr": "int32_array[1] + {value_0} == {value_2} && int32_array[0] - {value_0} != {value_0}", + "expr_params": {"value_0": 1, "value_2": 3}}], + ["int32_array[1] % 100 == 0 && string_array[1] in ['1', '2']", + {"expr": "int32_array[1] % {value_0} == {value_1} && string_array[1] in {value_2}", + "expr_params": {"value_0": 100, "value_1": 0, "value_2": ["1", "2"]}}], + ["int32_array[1] in [300/2, -10*30+800, (200-100)*2] or (float_array[1] <= -4**5/2 || 100 <= int32_array[1] < 200)", + {"expr": "int32_array[1] in {value_0} or (float_array[1] <= {value_1} || {value_2} <= int32_array[1] < {value_3})", + "expr_params": {"value_0": [int(300/2), -10*30+800, (200-100)*2], "value_1": -4**5/2, "value_2": 100, "value_3": 200}}] ] return expressions @@ -2437,37 +2457,42 @@ def gen_invalid_string_expressions(): return expressions -def gen_invalid_bool_expressions(): - expressions = [ - "bool", - "!bool", - "true", - "false", - "int64 > 0 and bool", - "int64 > 0 or false" +def gen_normal_expressions_and_templates_field(field): + """ + Gen a list of filter in expression-format(as a string) and template-format(as a dict) for a field. + The two formats equals to each other. + """ + expressions_and_templates = [ + ["", {"expr": "", "expr_params": {}}], + [f"{field} > 0", {"expr": f"{field} > {{value_0}}", "expr_params": {"value_0": 0}}], + [f"({field} > 0 && {field} < 400) or ({field} > 500 && {field} < 1000)", + {"expr": f"({field} > {{value_0}} && {field} < {{value_1}}) or ({field} > {{value_2}} && {field} < {{value_3}})", + "expr_params": {"value_0": 0, "value_1": 400, "value_2": 500, "value_3": 1000}}], + [f"{field} not in [1, 2, 3]", {"expr": f"{field} not in {{value_0}}", "expr_params": {"value_0": [1, 2, 3]}}], + [f"{field} in [1, 2, 3] and {field} != 2", {"expr": f"{field} in {{value_0}} and {field} != {{value_1}}", "expr_params": {"value_0": [1, 2, 3], "value_1": 2}}], + [f"{field} == 0 || {field} == 1 || {field} == 2", {"expr": f"{field} == {{value_0}} || {field} == {{value_1}} || {field} == {{value_2}}", + "expr_params": {"value_0": 0, "value_1": 1, "value_2": 2}}], + [f"0 < {field} < 400", {"expr": f"{{value_0}} < {field} < {{value_1}}", "expr_params": {"value_0": 0, "value_1": 400}}], + [f"500 <= {field} <= 1000", {"expr": f"{{value_0}} <= {field} <= {{value_1}}", "expr_params": {"value_0": 500, "value_1": 1000}}], + [f"200+300 <= {field} <= 500+500", {"expr": f"{{value_0}} <= {field} <= {{value_1}}", "expr_params": {"value_0": 200+300, "value_1": 500+500}}], + [f"{field} in [300/2, 900%40, -10*30+800, 2048/2%200, (100+200)*2]", {"expr": f"{field} in {{value_0}}", "expr_params": {"value_0": [300*1.0/2, 900*1.0%40, -10*30*1.0+800, 2048*1.0/2%200, (100+200)*1.0*2]}}], + [f"{field} in [+3**6, 2**10/2]", {"expr": f"{field} in {{value_0}}", "expr_params": {"value_0": [+3**6*1.0, 2**10*1.0/2]}}], + [f"{field} <= 4**5/2 && {field} > 500-1 && {field} != 500/2+260", {"expr": f"{field} <= {{value_0}} && {field} > {{value_1}} && {field} != {{value_2}}", + "expr_params": {"value_0": 4**5/2, "value_1": 500-1, "value_2": 500/2+260}}], + [f"{field} > 400 && {field} < 200", {"expr": f"{field} > {{value_0}} && {field} < {{value_1}}", "expr_params": {"value_0": 400, "value_1": 200}}], + [f"{field} < -2**8", {"expr": f"{field} < {{value_0}}", "expr_params": {"value_0": -2**8}}], + [f"({field} + 1) == 3 || {field} * 2 == 64 || {field} == 10**2", {"expr": f"({field} + {{value_0}}) == {{value_1}} || {field} * {{value_2}} == {{value_3}} || {field} == {{value_4}}", + "expr_params": {"value_0": 1, "value_1": 3, "value_2": 2, "value_3": 64, "value_4": 10**2}}] ] - return expressions + return expressions_and_templates -def gen_normal_expressions_field(field): - expressions = [ - "", - f"{field} > 0", - f"({field} > 0 && {field} < 400) or ({field} > 500 && {field} < 1000)", - f"{field} not in [1, 2, 3]", - f"{field} in [1, 2, 3] and {field} != 2", - f"{field} == 0 || {field} == 1 || {field} == 2", - f"0 < {field} < 400", - f"500 <= {field} <= 1000", - f"200+300 <= {field} <= 500+500", - f"{field} in [300/2, 900%40, -10*30+800, 2048/2%200, (100+200)*2]", - f"{field} in [+3**6, 2**10/2]", - f"{field} <= 4**5/2 && {field} > 500-1 && {field} != 500/2+260", - f"{field} > 400 && {field} < 200", - f"{field} < -2**8", - f"({field} + 1) == 3 || {field} * 2 == 64 || {field} == 10**2" - ] - return expressions +def get_expr_from_template(template={}): + return template.get("expr", None) + + +def get_expr_params_from_template(template={}): + return template.get("expr_params", None) def gen_integer_overflow_expressions(): diff --git a/tests/python_client/common/common_type.py b/tests/python_client/common/common_type.py index 94193fe6fb658..2fbc7edbfb6f6 100644 --- a/tests/python_client/common/common_type.py +++ b/tests/python_client/common/common_type.py @@ -207,15 +207,6 @@ {"": ""} ] -get_dict_invalid_host_port = [ - {"port": "port"}, - # ["host", "port"], - # ("host", "port"), - {"host": -1}, - {"port": ["192.168.1.1"]}, - {"port": "-1", "host": "hostlocal"}, -] - get_wrong_format_dict = [ {"host": "string_host", "port": {}}, {"host": 0, "port": 19520} diff --git a/tests/python_client/milvus_client/test_milvus_client_alias.py b/tests/python_client/milvus_client/test_milvus_client_alias.py index 686ecc3ddb98d..3690e6bbe87f4 100644 --- a/tests/python_client/milvus_client/test_milvus_client_alias.py +++ b/tests/python_client/milvus_client/test_milvus_client_alias.py @@ -92,9 +92,7 @@ def test_milvus_client_create_alias_collection_name_over_max_length(self): alias = cf.gen_unique_str("collection_alias") collection_name = "a".join("a" for i in range(256)) # 2. create alias - error = {ct.err_code: 1100, ct.err_msg: f"invalid dimension: {collection_name}. " - f"the length of a collection name must be less than 255 characters: " - f"invalid parameter"} + error = {ct.err_code: 1100, ct.err_msg: f"the length of a collection name must be less than 255 characters"} client_w.create_alias(client, collection_name, alias, check_task=CheckTasks.err_res, check_items=error) @@ -109,8 +107,7 @@ def test_milvus_client_create_alias_not_exist_collection(self): client = self._connect(enable_milvus_client_api=True) alias = cf.gen_unique_str("collection_alias") collection_name = "not_exist_collection_alias" - error = {ct.err_code: 100, ct.err_msg: f"can't find collection collection not " - f"found[database=default][collection={collection_name}]"} + error = {ct.err_code: 100, ct.err_msg: f"collection not found[database=default][collection={collection_name}]"} client_w.create_alias(client, collection_name, alias, check_task=CheckTasks.err_res, check_items=error) @@ -128,8 +125,9 @@ def test_milvus_client_create_alias_invalid_alias_name(self, alias): # 1. create collection client_w.create_collection(client, collection_name, default_dim, consistency_level="Strong") # 2. create alias - error = {ct.err_code: 1100, ct.err_msg: f"Invalid collection name: {collection_name}. the first character of a " - f"collection name must be an underscore or letter: invalid parameter"} + error = {ct.err_code: 1100, + ct.err_msg: f"Invalid collection alias: {alias}. " + f"the first character of a collection alias must be an underscore or letter"} client_w.create_alias(client, collection_name, alias, check_task=CheckTasks.err_res, check_items=error) client_w.drop_collection(client, collection_name) @@ -148,9 +146,7 @@ def test_milvus_client_create_alias_name_over_max_length(self): # 1. create collection client_w.create_collection(client, collection_name, default_dim, consistency_level="Strong") # 2. create alias - error = {ct.err_code: 1100, ct.err_msg: f"invalid dimension: {collection_name}. " - f"the length of a collection name must be less than 255 characters: " - f"invalid parameter"} + error = {ct.err_code: 1100, ct.err_msg: f"the length of a collection alias must be less than 255 characters"} client_w.create_alias(client, collection_name, alias, check_task=CheckTasks.err_res, check_items=error) client_w.drop_collection(client, collection_name) @@ -220,8 +216,8 @@ def test_milvus_client_drop_alias_invalid_alias_name(self, alias_name): expected: create alias successfully """ client = self._connect(enable_milvus_client_api=True) - error = {ct.err_code: 1100, ct.err_msg: f"Invalid collection name: {alias_name}. the first character of a " - f"collection name must be an underscore or letter: invalid parameter"} + error = {ct.err_code: 1100, ct.err_msg: f"Invalid collection alias: {alias_name}. the first character of a " + f"collection alias must be an underscore or letter"} client_w.drop_alias(client, alias_name, check_task=CheckTasks.err_res, check_items=error) @@ -235,9 +231,7 @@ def test_milvus_client_drop_alias_over_max_length(self): """ client = self._connect(enable_milvus_client_api=True) alias = "a".join("a" for i in range(256)) - error = {ct.err_code: 1100, ct.err_msg: f"invalid dimension: {alias}. " - f"the length of a collection name must be less than 255 characters: " - f"invalid parameter"} + error = {ct.err_code: 1100, ct.err_msg: f"the length of a collection alias must be less than 255 characters"} client_w.drop_alias(client, alias, check_task=CheckTasks.err_res, check_items=error) @@ -269,9 +263,7 @@ def test_milvus_client_alter_alias_collection_name_over_max_length(self): alias = cf.gen_unique_str("collection_alias") collection_name = "a".join("a" for i in range(256)) # 2. create alias - error = {ct.err_code: 1100, ct.err_msg: f"invalid dimension: {collection_name}. " - f"the length of a collection name must be less than 255 characters: " - f"invalid parameter"} + error = {ct.err_code: 1100, ct.err_msg: f"the length of a collection name must be less than 255 characters"} client_w.alter_alias(client, collection_name, alias, check_task=CheckTasks.err_res, check_items=error) @@ -287,8 +279,7 @@ def test_milvus_client_alter_alias_not_exist_collection(self): alias = cf.gen_unique_str("collection_alias") collection_name = cf.gen_unique_str("not_exist_collection_alias") # 2. create alias - error = {ct.err_code: 100, ct.err_msg: f"can't find collection collection not " - f"found[database=default][collection={collection_name}]"} + error = {ct.err_code: 100, ct.err_msg: f"collection not found[collection={collection_name}]"} client_w.alter_alias(client, collection_name, alias, check_task=CheckTasks.err_res, check_items=error) client_w.drop_collection(client, collection_name) @@ -307,10 +298,10 @@ def test_milvus_client_alter_alias_invalid_alias_name(self, alias): # 1. create collection client_w.create_collection(client, collection_name, default_dim, consistency_level="Strong") # 2. create alias - error = {ct.err_code: 1100, ct.err_msg: f"Invalid collection name: {collection_name}. the first character of a " - f"collection name must be an underscore or letter: invalid parameter"} + error = {ct.err_code: 1100, ct.err_msg: f"Invalid collection alias: {alias}. the first character of a " + f"collection alias must be an underscore or letter"} client_w.alter_alias(client, collection_name, alias, - check_task=CheckTasks.err_res, check_items=error) + check_task=CheckTasks.err_res, check_items=error) client_w.drop_collection(client, collection_name) @pytest.mark.tags(CaseLabel.L1) @@ -327,11 +318,9 @@ def test_milvus_client_alter_alias_name_over_max_length(self): # 1. create collection client_w.create_collection(client, collection_name, default_dim, consistency_level="Strong") # 2. create alias - error = {ct.err_code: 1100, ct.err_msg: f"invalid dimension: {collection_name}. " - f"the length of a collection name must be less than 255 characters: " - f"invalid parameter"} + error = {ct.err_code: 1100, ct.err_msg: f"the length of a collection alias must be less than 255 characters"} client_w.alter_alias(client, collection_name, alias, - check_task=CheckTasks.err_res, check_items=error) + check_task=CheckTasks.err_res, check_items=error) client_w.drop_collection(client, collection_name) @pytest.mark.tags(CaseLabel.L1) @@ -362,15 +351,15 @@ def test_milvus_client_alter_non_exists_alias(self): expected: alter alias successfully """ client = self._connect(enable_milvus_client_api=True) - collection_name = cf.gen_unique_str(prefix) - alias = cf.gen_unique_str("collection_alias") - another_alias = cf.gen_unique_str("collection_alias_another") + collection_name = cf.gen_unique_str("coll") + alias = cf.gen_unique_str("alias") + another_alias = cf.gen_unique_str("another_alias") # 1. create collection client_w.create_collection(client, collection_name, default_dim, consistency_level="Strong") # 2. create alias client_w.create_alias(client, collection_name, alias) # 3. alter alias - error = {ct.err_code: 1600, ct.err_msg: f"alias not found[database=default][alias={collection_name}]"} + error = {ct.err_code: 1600, ct.err_msg: f"alias not found[database=default][alias={another_alias}]"} client_w.alter_alias(client, collection_name, another_alias, check_task=CheckTasks.err_res, check_items=error) client_w.drop_collection(client, collection_name) diff --git a/tests/python_client/milvus_client/test_milvus_client_collection.py b/tests/python_client/milvus_client/test_milvus_client_collection.py index 43d71de2c8e4c..8ef66153942b4 100644 --- a/tests/python_client/milvus_client/test_milvus_client_collection.py +++ b/tests/python_client/milvus_client/test_milvus_client_collection.py @@ -89,9 +89,7 @@ def test_milvus_client_collection_name_over_max_length(self): client = self._connect(enable_milvus_client_api=True) # 1. create collection collection_name = "a".join("a" for i in range(256)) - error = {ct.err_code: 1100, ct.err_msg: f"invalid dimension: {collection_name}. " - f"the length of a collection name must be less than 255 characters: " - f"invalid parameter"} + error = {ct.err_code: 1100, ct.err_msg: f"the length of a collection name must be less than 255 characters"} client_w.create_collection(client, collection_name, default_dim, check_task=CheckTasks.err_res, check_items=error) @@ -120,7 +118,11 @@ def test_milvus_client_collection_invalid_dim(self, dim): client = self._connect(enable_milvus_client_api=True) collection_name = cf.gen_unique_str(prefix) # 1. create collection - error = {ct.err_code: 65535, ct.err_msg: f"invalid dimension: {dim}. should be in range 2 ~ 32768"} + error = {ct.err_code: 65535, ct.err_msg: f"invalid dimension: {dim}. " + f"float vector dimension should be in range 2 ~ 32768"} + if dim < ct.min_dim: + error = {ct.err_code: 65535, ct.err_msg: f"invalid dimension: {dim}. " + f"should be in range 2 ~ 32768"} client_w.create_collection(client, collection_name, dim, check_task=CheckTasks.err_res, check_items=error) client_w.drop_collection(client, collection_name) @@ -625,13 +627,11 @@ def test_milvus_client_collection_rename_collection_target_db(self): class TestMilvusClientDropCollectionInvalid(TestcaseBase): """ Test case of drop collection interface """ - """ ****************************************************************** # The following are invalid base cases ****************************************************************** """ - @pytest.mark.tags(CaseLabel.L1) @pytest.mark.parametrize("name", ["12-s", "12 s", "(mn)", "中文", "%$#"]) def test_milvus_client_drop_collection_invalid_collection_name(self, name): @@ -641,8 +641,8 @@ def test_milvus_client_drop_collection_invalid_collection_name(self, name): expected: create collection with default schema, index, and load successfully """ client = self._connect(enable_milvus_client_api=True) - error = {ct.err_code: 1100, ct.err_msg: f"Invalid collection name: {name}. collection name can only " - f"contain numbers, letters and underscores: invalid parameter"} + error = {ct.err_code: 1100, ct.err_msg: f"Invalid collection name: {name}. " + f"the first character of a collection name must be an underscore or letter"} client_w.drop_collection(client, name, check_task=CheckTasks.err_res, check_items=error) @@ -660,13 +660,11 @@ def test_milvus_client_drop_collection_not_existed(self): class TestMilvusClientReleaseCollectionInvalid(TestcaseBase): """ Test case of release collection interface """ - """ ****************************************************************** # The following are invalid base cases ****************************************************************** """ - @pytest.mark.tags(CaseLabel.L1) @pytest.mark.parametrize("name", ["12-s", "12 s", "(mn)", "中文", "%$#"]) def test_milvus_client_release_collection_invalid_collection_name(self, name): @@ -676,8 +674,9 @@ def test_milvus_client_release_collection_invalid_collection_name(self, name): expected: create collection with default schema, index, and load successfully """ client = self._connect(enable_milvus_client_api=True) - error = {ct.err_code: 1100, ct.err_msg: f"Invalid collection name: {name}. collection name can only " - f"contain numbers, letters and underscores: invalid parameter"} + error = {ct.err_code: 1100, + ct.err_msg: f"Invalid collection name: {name}. " + f"the first character of a collection name must be an underscore or letter"} client_w.release_collection(client, name, check_task=CheckTasks.err_res, check_items=error) @@ -705,9 +704,7 @@ def test_milvus_client_release_collection_name_over_max_length(self): client = self._connect(enable_milvus_client_api=True) # 1. create collection collection_name = "a".join("a" for i in range(256)) - error = {ct.err_code: 1100, ct.err_msg: f"invalid dimension: {collection_name}. " - f"the length of a collection name must be less than 255 characters: " - f"invalid parameter"} + error = {ct.err_code: 1100, ct.err_msg: f"the length of a collection name must be less than 255 characters"} client_w.release_collection(client, collection_name, default_dim, check_task=CheckTasks.err_res, check_items=error) @@ -773,13 +770,11 @@ def test_milvus_client_load_partially_loaded_collection(self): class TestMilvusClientLoadCollectionInvalid(TestcaseBase): """ Test case of search interface """ - """ ****************************************************************** # The following are invalid base cases ****************************************************************** """ - @pytest.mark.tags(CaseLabel.L1) @pytest.mark.parametrize("name", ["12-s", "12 s", "(mn)", "中文", "%$#"]) def test_milvus_client_load_collection_invalid_collection_name(self, name): @@ -789,8 +784,9 @@ def test_milvus_client_load_collection_invalid_collection_name(self, name): expected: create collection with default schema, index, and load successfully """ client = self._connect(enable_milvus_client_api=True) - error = {ct.err_code: 1100, ct.err_msg: f"Invalid collection name: {name}. collection name can only " - f"contain numbers, letters and underscores: invalid parameter"} + error = {ct.err_code: 1100, + ct.err_msg: f"Invalid collection name: {name}. " + f"the first character of a collection name must be an underscore or letter"} client_w.load_collection(client, name, check_task=CheckTasks.err_res, check_items=error) @@ -904,13 +900,11 @@ def test_milvus_client_load_partially_loaded_collection(self): class TestMilvusClientDescribeCollectionInvalid(TestcaseBase): """ Test case of search interface """ - """ ****************************************************************** # The following are invalid base cases ****************************************************************** """ - @pytest.mark.tags(CaseLabel.L1) @pytest.mark.parametrize("name", ["12-s", "12 s", "(mn)", "中文", "%$#"]) def test_milvus_client_describe_collection_invalid_collection_name(self, name): @@ -920,8 +914,9 @@ def test_milvus_client_describe_collection_invalid_collection_name(self, name): expected: create collection with default schema, index, and load successfully """ client = self._connect(enable_milvus_client_api=True) - error = {ct.err_code: 1100, ct.err_msg: f"Invalid collection name: {name}. collection name can only " - f"contain numbers, letters and underscores: invalid parameter"} + error = {ct.err_code: 1100, + ct.err_msg: f"Invalid collection name: {name}. " + f"the first character of a collection name must be an underscore or letter"} client_w.describe_collection(client, name, check_task=CheckTasks.err_res, check_items=error) @@ -959,13 +954,11 @@ def test_milvus_client_describe_collection_deleted_collection(self): class TestMilvusClientHasCollectionInvalid(TestcaseBase): """ Test case of search interface """ - """ ****************************************************************** # The following are invalid base cases ****************************************************************** """ - @pytest.mark.tags(CaseLabel.L1) @pytest.mark.parametrize("name", ["12-s", "12 s", "(mn)", "中文", "%$#"]) def test_milvus_client_has_collection_invalid_collection_name(self, name): @@ -975,8 +968,9 @@ def test_milvus_client_has_collection_invalid_collection_name(self, name): expected: create collection with default schema, index, and load successfully """ client = self._connect(enable_milvus_client_api=True) - error = {ct.err_code: 1100, ct.err_msg: f"Invalid collection name: {name}. collection name can only " - f"contain numbers, letters and underscores: invalid parameter"} + error = {ct.err_code: 1100, + ct.err_msg: f"Invalid collection name: {name}. " + f"the first character of a collection name must be an underscore or letter"} client_w.has_collection(client, name, check_task=CheckTasks.err_res, check_items=error) diff --git a/tests/python_client/milvus_client/test_milvus_client_index.py b/tests/python_client/milvus_client/test_milvus_client_index.py index 081d3a8bb27b4..ac718528484e5 100644 --- a/tests/python_client/milvus_client/test_milvus_client_index.py +++ b/tests/python_client/milvus_client/test_milvus_client_index.py @@ -128,10 +128,9 @@ def test_milvus_client_index_not_exist_collection_name(self): client_w.drop_index(client, collection_name, "vector") # 2. prepare index params index_params = client_w.prepare_index_params(client)[0] - index_params.add_index(field_name = "vector") + index_params.add_index(field_name="vector") # 3. create index - error = {ct.err_code: 100, ct.err_msg: f"can't find collection collection not " - f"found[database=default][collection=not_existed]"} + error = {ct.err_code: 100, ct.err_msg: f"can't find collection[database=default][collection={not_existed_collection_name}]"} client_w.create_index(client, not_existed_collection_name, index_params, check_task=CheckTasks.err_res, check_items=error) client_w.drop_collection(client, collection_name) diff --git a/tests/python_client/milvus_client/test_milvus_client_insert.py b/tests/python_client/milvus_client/test_milvus_client_insert.py index 867a89eb64772..ce93aba311a93 100644 --- a/tests/python_client/milvus_client/test_milvus_client_insert.py +++ b/tests/python_client/milvus_client/test_milvus_client_insert.py @@ -128,9 +128,7 @@ def test_milvus_client_insert_collection_name_over_max_length(self): rng = np.random.default_rng(seed=19530) rows = [{default_primary_key_field_name: i, default_vector_field_name: list(rng.random((1, default_dim))[0]), default_float_field_name: i * 1.0, default_string_field_name: str(i)} for i in range(default_nb)] - error = {ct.err_code: 1100, ct.err_msg: f"invalid dimension: {collection_name}. " - f"the length of a collection name must be less than 255 characters: " - f"invalid parameter"} + error = {ct.err_code: 1100, ct.err_msg: f"the length of a collection name must be less than 255 characters"} client_w.insert(client, collection_name, rows, check_task=CheckTasks.err_res, check_items=error) @@ -146,8 +144,7 @@ def test_milvus_client_insert_not_exist_collection_name(self): rng = np.random.default_rng(seed=19530) rows = [{default_primary_key_field_name: i, default_vector_field_name: list(rng.random((1, default_dim))[0]), default_float_field_name: i * 1.0, default_string_field_name: str(i)} for i in range(default_nb)] - error = {ct.err_code: 100, ct.err_msg: f"can't find collection collection not found" - f"[database=default][collection={collection_name}]"} + error = {ct.err_code: 100, ct.err_msg: f"can't find collection[database=default][collection={collection_name}]"} client_w.insert(client, collection_name, rows, check_task=CheckTasks.err_res, check_items=error) @@ -184,8 +181,9 @@ def test_milvus_client_insert_data_vector_field_missing(self): rng = np.random.default_rng(seed=19530) rows = [{default_primary_key_field_name: i, default_float_field_name: i * 1.0, default_string_field_name: str(i)} for i in range(default_nb)] - error = {ct.err_code: 1, ct.err_msg: f"float vector field 'vector' is illegal, array type mismatch: " - f"invalid parameter[expected=need float vector][actual=got nil]"} + error = {ct.err_code: 1, + ct.err_msg: f"Insert missed an field `vector` to collection " + f"without set nullable==true or set default_value"} client_w.insert(client, collection_name, data=rows, check_task=CheckTasks.err_res, check_items=error) @@ -204,7 +202,8 @@ def test_milvus_client_insert_data_id_field_missing(self): rng = np.random.default_rng(seed=19530) rows = [{default_vector_field_name: list(rng.random((1, default_dim))[0]), default_float_field_name: i * 1.0, default_string_field_name: str(i)} for i in range(default_nb)] - error = {ct.err_code: 1, ct.err_msg: f"currently not support vector field as PrimaryField: invalid parameter"} + error = {ct.err_code: 1, + ct.err_msg: f"Insert missed an field `id` to collection without set nullable==true or set default_value"} client_w.insert(client, collection_name, data=rows, check_task=CheckTasks.err_res, check_items=error) @@ -223,9 +222,9 @@ def test_milvus_client_insert_data_extra_field(self): rng = np.random.default_rng(seed=19530) rows = [{default_primary_key_field_name: i, default_vector_field_name: list(rng.random((1, default_dim))[0]), default_float_field_name: i * 1.0, default_string_field_name: str(i)} for i in range(default_nb)] - error = {ct.err_code: 1, ct.err_msg: f"Attempt to insert an unexpected field " - f"to collection without enabling dynamic field"} - client_w.insert(client, collection_name, data= rows, + error = {ct.err_code: 1, + ct.err_msg: f"Attempt to insert an unexpected field `float` to collection without enabling dynamic field"} + client_w.insert(client, collection_name, data=rows, check_task=CheckTasks.err_res, check_items=error) @pytest.mark.tags(CaseLabel.L1) @@ -262,9 +261,10 @@ def test_milvus_client_insert_not_matched_data(self): rng = np.random.default_rng(seed=19530) rows = [{default_primary_key_field_name: str(i), default_vector_field_name: list(rng.random((1, default_dim))[0]), default_float_field_name: i * 1.0, default_string_field_name: str(i)} for i in range(default_nb)] - error = {ct.err_code: 1, ct.err_msg: f"The Input data type is inconsistent with defined schema, " - f"please check it."} - client_w.insert(client, collection_name, data= rows, + error = {ct.err_code: 1, + ct.err_msg: f"The Input data type is inconsistent with defined schema, " + f"{{id}} field should be a int64"} + client_w.insert(client, collection_name, data=rows, check_task=CheckTasks.err_res, check_items=error) @pytest.mark.tags(CaseLabel.L1) @@ -283,9 +283,10 @@ def test_milvus_client_insert_invalid_partition_name(self, partition_name): rng = np.random.default_rng(seed=19530) rows = [{default_primary_key_field_name: i, default_vector_field_name: list(rng.random((1, default_dim))[0]), default_float_field_name: i * 1.0, default_string_field_name: str(i)} for i in range(default_nb)] - error = {ct.err_code: 65535, ct.err_msg: f"Invalid partition name: {partition_name}. The first character of " - f"a partition name must be an underscore or letter."} - client_w.insert(client, collection_name, data= rows, partition_name=partition_name, + error = {ct.err_code: 65535, ct.err_msg: f"Invalid partition name: {partition_name}."} + if partition_name == " ": + error = {ct.err_code: 1, ct.err_msg: f"Invalid partition name: . Partition name should not be empty."} + client_w.insert(client, collection_name, data=rows, partition_name=partition_name, check_task=CheckTasks.err_res, check_items=error) @pytest.mark.tags(CaseLabel.L1) @@ -584,9 +585,7 @@ def test_milvus_client_upsert_collection_name_over_max_length(self): rng = np.random.default_rng(seed=19530) rows = [{default_primary_key_field_name: i, default_vector_field_name: list(rng.random((1, default_dim))[0]), default_float_field_name: i * 1.0, default_string_field_name: str(i)} for i in range(default_nb)] - error = {ct.err_code: 1100, ct.err_msg: f"invalid dimension: {collection_name}. " - f"the length of a collection name must be less than 255 characters: " - f"invalid parameter"} + error = {ct.err_code: 1100, ct.err_msg: f"the length of a collection name must be less than 255 characters"} client_w.upsert(client, collection_name, rows, check_task=CheckTasks.err_res, check_items=error) @@ -602,13 +601,11 @@ def test_milvus_client_upsert_not_exist_collection_name(self): rng = np.random.default_rng(seed=19530) rows = [{default_primary_key_field_name: i, default_vector_field_name: list(rng.random((1, default_dim))[0]), default_float_field_name: i * 1.0, default_string_field_name: str(i)} for i in range(default_nb)] - error = {ct.err_code: 100, ct.err_msg: f"can't find collection collection not found" - f"[database=default][collection={collection_name}]"} + error = {ct.err_code: 100, ct.err_msg: f"can't find collection[database=default][collection={collection_name}]"} client_w.upsert(client, collection_name, rows, check_task=CheckTasks.err_res, check_items=error) @pytest.mark.tags(CaseLabel.L1) - @pytest.mark.xfail(reason="pymilvus issue 1894") @pytest.mark.parametrize("data", ["12-s", "12 s", "(mn)", "中文", "%$#", " "]) def test_milvus_client_upsert_data_invalid_type(self, data): """ @@ -621,12 +618,11 @@ def test_milvus_client_upsert_data_invalid_type(self, data): # 1. create collection client_w.create_collection(client, collection_name, default_dim, consistency_level="Strong") # 2. insert - error = {ct.err_code: 1, ct.err_msg: f"None rows, please provide valid row data."} + error = {ct.err_code: 1, ct.err_msg: f"wrong type of argument 'data',expected 'Dict' or list of 'Dict'"} client_w.upsert(client, collection_name, data, check_task=CheckTasks.err_res, check_items=error) @pytest.mark.tags(CaseLabel.L1) - @pytest.mark.xfail(reason="pymilvus issue 1895") def test_milvus_client_upsert_data_empty(self): """ target: test high level api: client.create_collection @@ -638,8 +634,9 @@ def test_milvus_client_upsert_data_empty(self): # 1. create collection client_w.create_collection(client, collection_name, default_dim, consistency_level="Strong") # 2. insert - error = {ct.err_code: 1, ct.err_msg: f"None rows, please provide valid row data."} - client_w.upsert(client, collection_name, data= "") + error = {ct.err_code: 1, ct.err_msg: f"wrong type of argument 'data',expected 'Dict' or list of 'Dict'"} + client_w.upsert(client, collection_name, data="", + check_task=CheckTasks.err_res, check_items=error) @pytest.mark.tags(CaseLabel.L1) def test_milvus_client_upsert_data_vector_field_missing(self): @@ -655,9 +652,9 @@ def test_milvus_client_upsert_data_vector_field_missing(self): # 2. insert rng = np.random.default_rng(seed=19530) rows = [{default_primary_key_field_name: i, - default_float_field_name: i * 1.0, default_string_field_name: str(i)} for i in range(default_nb)] - error = {ct.err_code: 1, ct.err_msg: f"float vector field 'vector' is illegal, array type mismatch: " - f"invalid parameter[expected=need float vector][actual=got nil]"} + default_float_field_name: i * 1.0, default_string_field_name: str(i)} for i in range(10)] + error = {ct.err_code: 1, + ct.err_msg: "Insert missed an field `vector` to collection without set nullable==true or set default_value"} client_w.upsert(client, collection_name, data=rows, check_task=CheckTasks.err_res, check_items=error) @@ -675,9 +672,10 @@ def test_milvus_client_upsert_data_id_field_missing(self): # 2. insert rng = np.random.default_rng(seed=19530) rows = [{default_vector_field_name: list(rng.random((1, default_dim))[0]), - default_float_field_name: i * 1.0, default_string_field_name: str(i)} for i in range(default_nb)] - error = {ct.err_code: 1, ct.err_msg: f"currently not support vector field as PrimaryField: invalid parameter"} - client_w.upsert(client, collection_name, data= rows, + default_float_field_name: i * 1.0, default_string_field_name: str(i)} for i in range(20)] + error = {ct.err_code: 1, + ct.err_msg: f"Insert missed an field `id` to collection without set nullable==true or set default_value"} + client_w.upsert(client, collection_name, data=rows, check_task=CheckTasks.err_res, check_items=error) @pytest.mark.tags(CaseLabel.L1) @@ -690,14 +688,15 @@ def test_milvus_client_upsert_data_extra_field(self): client = self._connect(enable_milvus_client_api=True) collection_name = cf.gen_unique_str(prefix) # 1. create collection - client_w.create_collection(client, collection_name, default_dim, enable_dynamic_field=False) + dim= 32 + client_w.create_collection(client, collection_name, dim, enable_dynamic_field=False) # 2. insert rng = np.random.default_rng(seed=19530) - rows = [{default_primary_key_field_name: i, default_vector_field_name: list(rng.random((1, default_dim))[0]), - default_float_field_name: i * 1.0, default_string_field_name: str(i)} for i in range(default_nb)] - error = {ct.err_code: 1, ct.err_msg: f"Attempt to insert an unexpected field " - f"to collection without enabling dynamic field"} - client_w.upsert(client, collection_name, data= rows, + rows = [{default_primary_key_field_name: i, default_vector_field_name: list(rng.random((1, dim))[0]), + default_float_field_name: i * 1.0, default_string_field_name: str(i)} for i in range(10)] + error = {ct.err_code: 1, + ct.err_msg: f"Attempt to insert an unexpected field `float` to collection without enabling dynamic field"} + client_w.upsert(client, collection_name, data=rows, check_task=CheckTasks.err_res, check_items=error) @pytest.mark.tags(CaseLabel.L1) @@ -734,8 +733,8 @@ def test_milvus_client_upsert_not_matched_data(self): rng = np.random.default_rng(seed=19530) rows = [{default_primary_key_field_name: str(i), default_vector_field_name: list(rng.random((1, default_dim))[0]), default_float_field_name: i * 1.0, default_string_field_name: str(i)} for i in range(default_nb)] - error = {ct.err_code: 1, ct.err_msg: f"The Input data type is inconsistent with defined schema, " - f"please check it."} + error = {ct.err_code: 1, + ct.err_msg: "The Input data type is inconsistent with defined schema, {id} field should be a int64"} client_w.upsert(client, collection_name, data= rows, check_task=CheckTasks.err_res, check_items=error) @@ -755,8 +754,9 @@ def test_milvus_client_upsert_invalid_partition_name(self, partition_name): rng = np.random.default_rng(seed=19530) rows = [{default_primary_key_field_name: i, default_vector_field_name: list(rng.random((1, default_dim))[0]), default_float_field_name: i * 1.0, default_string_field_name: str(i)} for i in range(default_nb)] - error = {ct.err_code: 65535, ct.err_msg: f"Invalid partition name: {partition_name}. The first character of " - f"a partition name must be an underscore or letter."} + error = {ct.err_code: 65535, ct.err_msg: f"Invalid partition name: {partition_name}"} + if partition_name == " ": + error = {ct.err_code: 1, ct.err_msg: f"Invalid partition name: . Partition name should not be empty."} client_w.upsert(client, collection_name, data= rows, partition_name=partition_name, check_task=CheckTasks.err_res, check_items=error) diff --git a/tests/python_client/milvus_client/test_milvus_client_partition.py b/tests/python_client/milvus_client/test_milvus_client_partition.py index 0ab2359215059..21cc32dc5c0ec 100644 --- a/tests/python_client/milvus_client/test_milvus_client_partition.py +++ b/tests/python_client/milvus_client/test_milvus_client_partition.py @@ -125,8 +125,7 @@ def test_milvus_client_partition_invalid_partition_name(self, partition_name): collection_name = cf.gen_unique_str(prefix) # 2. create partition client_w.create_collection(client, collection_name, default_dim) - error = {ct.err_code: 65535, ct.err_msg: f"Invalid partition name: {partition_name}. The first character of a " - f"partition name must be an underscore or letter.]"} + error = {ct.err_code: 65535, ct.err_msg: f"Invalid partition name: {partition_name}"} client_w.create_partition(client, collection_name, partition_name, check_task=CheckTasks.err_res, check_items=error) @@ -396,8 +395,7 @@ def test_milvus_client_drop_partition_invalid_partition_name(self, partition_nam collection_name = cf.gen_unique_str(prefix) # 2. create partition client_w.create_collection(client, collection_name, default_dim) - error = {ct.err_code: 65535, ct.err_msg: f"Invalid partition name: {partition_name}. The first character of a " - f"partition name must be an underscore or letter.]"} + error = {ct.err_code: 65535, ct.err_msg: f"Invalid partition name: {partition_name}."} client_w.drop_partition(client, collection_name, partition_name, check_task=CheckTasks.err_res, check_items=error) @@ -822,8 +820,7 @@ def test_milvus_client_has_partition_invalid_partition_name(self, partition_name collection_name = cf.gen_unique_str(prefix) # 2. create partition client_w.create_collection(client, collection_name, default_dim) - error = {ct.err_code: 65535, ct.err_msg: f"Invalid partition name: {partition_name}. The first character of a " - f"partition name must be an underscore or letter.]"} + error = {ct.err_code: 65535, ct.err_msg: f"Invalid partition name: {partition_name}"} client_w.has_partition(client, collection_name, partition_name, check_task=CheckTasks.err_res, check_items=error) @@ -839,8 +836,8 @@ def test_milvus_client_has_partition_name_over_max_length(self): partition_name = "a".join("a" for i in range(256)) # 2. create partition client_w.create_collection(client, collection_name, default_dim) - error = {ct.err_code: 65535, ct.err_msg: f"Invalid partition name: {partition_name}. the length of a collection name " - f"must be less than 255 characters: invalid parameter"} + error = {ct.err_code: 65535, ct.err_msg: f"Invalid partition name: {partition_name}. " + f"The length of a partition name must be less than 255 characters"} client_w.has_partition(client, collection_name, partition_name, check_task=CheckTasks.err_res, check_items=error) diff --git a/tests/python_client/testcases/test_alias.py b/tests/python_client/testcases/test_alias.py index 41322ac311dd7..b06d165f2f8aa 100644 --- a/tests/python_client/testcases/test_alias.py +++ b/tests/python_client/testcases/test_alias.py @@ -297,15 +297,14 @@ def test_alias_create_duplication_alias(self): check_items={exp_name: c_1_name, exp_schema: default_schema}) alias_a_name = cf.gen_unique_str(prefix) self.utility_wrap.create_alias(collection_1.name, alias_a_name) - # collection_1.create_alias(alias_a_name) c_2_name = cf.gen_unique_str("collection") collection_2 = self.init_collection_wrap(name=c_2_name, schema=default_schema, check_task=CheckTasks.check_collection_property, check_items={exp_name: c_2_name, exp_schema: default_schema}) error = {ct.err_code: 1602, - ct.err_msg: f"alias exists and already aliased to another collection, alias: {alias_a_name}, " - f"collection: {c_1_name}, other collection: {c_2_name}"} + ct.err_msg: f"{alias_a_name} is alias to another collection: {collection_1.name}: " + f"alias already exist[database=default][alias={alias_a_name}]"} self.utility_wrap.create_alias(collection_2.name, alias_a_name, check_task=CheckTasks.err_res, check_items=error) @@ -330,7 +329,7 @@ def test_alias_alter_not_exist_alias(self): alias_not_exist_name = cf.gen_unique_str(prefix) error = {ct.err_code: 1600, - ct.err_msg: "Alter alias failed: alias does not exist"} + ct.err_msg: f"alias not found[database=default][alias={alias_not_exist_name}]"} self.utility_wrap.alter_alias(collection_w.name, alias_not_exist_name, check_task=CheckTasks.err_res, check_items=error) diff --git a/tests/python_client/testcases/test_collection.py b/tests/python_client/testcases/test_collection.py index 02f6eab1c4798..a40949c922d81 100644 --- a/tests/python_client/testcases/test_collection.py +++ b/tests/python_client/testcases/test_collection.py @@ -270,7 +270,8 @@ def test_collection_none_schema(self): """ self._connect() c_name = cf.gen_unique_str(prefix) - error = {ct.err_code: 1, ct.err_msg: "Collection '%s' not exist, or you can pass in schema to create one."} + error = {ct.err_code: 999, + ct.err_msg: f"Collection '{c_name}' not exist, or you can pass in schema to create one."} self.collection_wrap.init_collection(c_name, schema=None, check_task=CheckTasks.err_res, check_items=error) @pytest.mark.tags(CaseLabel.L2) @@ -762,7 +763,7 @@ def test_collection_none_desc(self): self._connect() c_name = cf.gen_unique_str(prefix) schema = cf.gen_default_collection_schema(description=None) - error = {ct.err_code: 1, ct.err_msg: "None has type NoneType, but expected one of: bytes, unicode"} + error = {ct.err_code: 1, ct.err_msg: "description [None] has type NoneType, but expected one of: bytes, str"} self.collection_wrap.init_collection(c_name, schema=schema, check_task=CheckTasks.err_res, check_items=error) @pytest.mark.tags(CaseLabel.L2) @@ -847,7 +848,7 @@ def test_collection_shards_num_with_error_type(self, error_type_shards_num): """ self._connect() c_name = cf.gen_unique_str(prefix) - error = {ct.err_code: 1, ct.err_msg: f"expected one of: int, long"} + error = {ct.err_code: 999, ct.err_msg: f"invalid num_shards type"} self.collection_wrap.init_collection(c_name, schema=default_schema, shards_num=error_type_shards_num, check_task=CheckTasks.err_res, check_items=error) @@ -1086,7 +1087,7 @@ def test_collection_without_connection(self): self.connection_wrap.remove_connection(ct.default_alias) res_list, _ = self.connection_wrap.list_connections() assert ct.default_alias not in res_list - error = {ct.err_code: 1, ct.err_msg: 'should create connect first'} + error = {ct.err_code: 1, ct.err_msg: 'should create connection first'} self.collection_wrap.init_collection(c_name, schema=default_schema, check_task=CheckTasks.err_res, check_items=error) assert self.collection_wrap.collection is None @@ -1261,7 +1262,7 @@ def test_construct_from_none_dataframe(self): """ self._connect() c_name = cf.gen_unique_str(prefix) - error = {ct.err_code: 1, ct.err_msg: "Dataframe can not be None."} + error = {ct.err_code: 999, ct.err_msg: "Data type must be pandas.DataFrame"} self.collection_wrap.construct_from_dataframe(c_name, None, check_task=CheckTasks.err_res, check_items=error) @pytest.mark.tags(CaseLabel.L2) @@ -1291,7 +1292,8 @@ def test_construct_from_inconsistent_dataframe(self): mix_data = [(1, 2., [0.1, 0.2]), (2, 3., 4)] df = pd.DataFrame(data=mix_data, columns=list("ABC")) error = {ct.err_code: 1, - ct.err_msg: "The Input data type is inconsistent with defined schema, please check it."} + ct.err_msg: "The Input data type is inconsistent with defined schema, " + "{C} field should be a float_vector, but got a {} instead."} self.collection_wrap.construct_from_dataframe(c_name, df, primary_field='A', check_task=CheckTasks.err_res, check_items=error) @@ -1965,7 +1967,7 @@ def test_drop_collection_without_connection(self): self.connection_wrap.remove_connection(ct.default_alias) res_list, _ = self.connection_wrap.list_connections() assert ct.default_alias not in res_list - error = {ct.err_code: 1, ct.err_msg: 'should create connect first'} + error = {ct.err_code: 1, ct.err_msg: 'should create connection first'} collection_wr.drop(check_task=CheckTasks.err_res, check_items=error) @pytest.mark.tags(CaseLabel.L1) @@ -2066,7 +2068,7 @@ def test_has_collection_without_connection(self): self.connection_wrap.remove_connection(ct.default_alias) res_list, _ = self.connection_wrap.list_connections() assert ct.default_alias not in res_list - error = {ct.err_code: 1, ct.err_msg: 'should create connect first'} + error = {ct.err_code: 1, ct.err_msg: 'should create connection first'} self.utility_wrap.has_collection(c_name, check_task=CheckTasks.err_res, check_items=error) @pytest.mark.tags(CaseLabel.L2) @@ -2181,7 +2183,7 @@ def test_list_collections_without_connection(self): self.connection_wrap.remove_connection(ct.default_alias) res_list, _ = self.connection_wrap.list_connections() assert ct.default_alias not in res_list - error = {ct.err_code: 1, ct.err_msg: 'should create connect first'} + error = {ct.err_code: 999, ct.err_msg: 'should create connection first'} self.utility_wrap.list_collections(check_task=CheckTasks.err_res, check_items=error) @pytest.mark.tags(CaseLabel.L2) @@ -2274,7 +2276,7 @@ def test_load_collection_dis_connect(self): self.connection_wrap.remove_connection(ct.default_alias) res_list, _ = self.connection_wrap.list_connections() assert ct.default_alias not in res_list - error = {ct.err_code: 1, ct.err_msg: 'should create connect first'} + error = {ct.err_code: 1, ct.err_msg: 'should create connection first'} collection_wr.load(check_task=CheckTasks.err_res, check_items=error) @pytest.mark.tags(CaseLabel.L2) @@ -2290,7 +2292,7 @@ def test_release_collection_dis_connect(self): self.connection_wrap.remove_connection(ct.default_alias) res_list, _ = self.connection_wrap.list_connections() assert ct.default_alias not in res_list - error = {ct.err_code: 1, ct.err_msg: 'should create connect first'} + error = {ct.err_code: 999, ct.err_msg: 'should create connection first'} collection_wr.release(check_task=CheckTasks.err_res, check_items=error) @pytest.mark.tags(CaseLabel.L2) @@ -2304,8 +2306,8 @@ def test_load_collection_not_existed(self): c_name = cf.gen_unique_str() collection_wr = self.init_collection_wrap(name=c_name) collection_wr.drop() - error = {ct.err_code: 100, - ct.err_msg: "collection= : collection not found"} + error = {ct.err_code: 999, + ct.err_msg: "collection not found"} collection_wr.load(check_task=CheckTasks.err_res, check_items=error) @pytest.mark.tags(CaseLabel.L2) @@ -2319,8 +2321,8 @@ def test_release_collection_not_existed(self): c_name = cf.gen_unique_str() collection_wr = self.init_collection_wrap(name=c_name) collection_wr.drop() - error = {ct.err_code: 100, - ct.err_msg: "collection= : collection not found"} + error = {ct.err_code: 999, + ct.err_msg: "collection not found"} collection_wr.release(check_task=CheckTasks.err_res, check_items=error) @pytest.mark.tags(CaseLabel.L2) @@ -2801,8 +2803,8 @@ def test_load_replica_greater_than_querynodes(self): assert collection_w.num_entities == ct.default_nb collection_w.create_index(ct.default_float_vec_field_name, index_params=ct.default_flat_index) - error = {ct.err_code: 65535, - ct.err_msg: "failed to load collection: failed to spawn replica for collection: nodes not enough"} + error = {ct.err_code: 999, + ct.err_msg: "failed to spawn replica for collection: resource group node not enough"} collection_w.load(replica_number=3, check_task=CheckTasks.err_res, check_items=error) @pytest.mark.tags(CaseLabel.ClusterOnly) @@ -3315,7 +3317,7 @@ def test_load_partition_dis_connect(self): self.connection_wrap.remove_connection(ct.default_alias) res_list, _ = self.connection_wrap.list_connections() assert ct.default_alias not in res_list - error = {ct.err_code: 1, ct.err_msg: 'should create connect first.'} + error = {ct.err_code: 999, ct.err_msg: 'should create connection first.'} partition_w.load(check_task=CheckTasks.err_res, check_items=error) @pytest.mark.tags(CaseLabel.L2) @@ -3340,7 +3342,7 @@ def test_release_partition_dis_connect(self): self.connection_wrap.remove_connection(ct.default_alias) res_list, _ = self.connection_wrap.list_connections() assert ct.default_alias not in res_list - error = {ct.err_code: 1, ct.err_msg: 'should create connect first.'} + error = {ct.err_code: 1, ct.err_msg: 'should create connection first.'} partition_w.release(check_task=CheckTasks.err_res, check_items=error) @pytest.mark.tags(CaseLabel.L2) @@ -3956,7 +3958,7 @@ def test_collection_json_field_partition_key(self, primary_field): """ self._connect() cf.gen_unique_str(prefix) - error = {ct.err_code: 1, ct.err_msg: "Partition key field type must be DataType.INT64 or DataType.VARCHAR."} + error = {ct.err_code: 1, ct.err_msg: "Primary key type must be DataType.INT64 or DataType.VARCHAR"} cf.gen_json_default_collection_schema(primary_field=primary_field, is_partition_key=True, check_task=CheckTasks.err_res, check_items=error) @@ -4015,7 +4017,6 @@ def test_collection_array_field_element_type_not_exist(self): check_items={ct.err_code: 65535, ct.err_msg: "element data type None is not valid"}) @pytest.mark.tags(CaseLabel.L2) - # @pytest.mark.skip("issue #27522") @pytest.mark.parametrize("element_type", [1001, 'a', [], (), {1}, DataType.BINARY_VECTOR, DataType.FLOAT_VECTOR, DataType.JSON, DataType.ARRAY]) def test_collection_array_field_element_type_invalid(self, element_type): @@ -4030,9 +4031,20 @@ def test_collection_array_field_element_type_invalid(self, element_type): vec_field = cf.gen_float_vec_field() array_field = cf.gen_array_field(element_type=element_type) array_schema = cf.gen_collection_schema([int_field, vec_field, array_field]) - error = {ct.err_code: 65535, ct.err_msg: "element data type None is not valid"} + error = {ct.err_code: 999, ct.err_msg: f"element type {element_type} is not supported"} if element_type in ['a', {1}]: - error = {ct.err_code: 1, ct.err_msg: "Unexpected error"} + error = {ct.err_code: 999, ct.err_msg: "Unexpected error"} + if element_type in [[], ()]: + error = {ct.err_code: 65535, ct.err_msg: "element data type None is not valid"} + if element_type in [DataType.BINARY_VECTOR, DataType.FLOAT_VECTOR, DataType.JSON, DataType.ARRAY]: + data_type = element_type.name + if element_type == DataType.BINARY_VECTOR: + data_type = "BinaryVector" + if element_type == DataType.FLOAT_VECTOR: + data_type = "FloatVector" + if element_type == DataType.ARRAY: + data_type = "Array" + error = {ct.err_code: 999, ct.err_msg: f"element type {data_type} is not supported"} self.init_collection_wrap(schema=array_schema, check_task=CheckTasks.err_res, check_items=error) @pytest.mark.tags(CaseLabel.L2) @@ -4391,6 +4403,7 @@ def test_create_collection_multiple_vectors_invalid_all_vector_field_name(self, self.collection_wrap.init_collection(c_name, schema=schema, check_task=CheckTasks.err_res, check_items=error) @pytest.mark.tags(CaseLabel.L1) + @pytest.mark.skip("issue #37543") def test_create_collection_multiple_vectors_invalid_dim(self, get_invalid_dim): """ target: test create collection with multiple vector fields @@ -4645,11 +4658,11 @@ def test_create_collection_default_value_on_vector_field(self, vector_type): int_fields.append(cf.gen_int64_field(is_primary=True)) int_fields.append(cf.gen_float_vec_field(vector_data_type=vector_type, default_value=10)) schema = cf.gen_collection_schema(fields=int_fields) - error = {ct.err_code: 1100, ct.err_msg: "default value type mismatches field schema type"} + error = {ct.err_code: 1100, ct.err_msg: f"type not support default_value"} self.collection_wrap.init_collection(c_name, schema=schema, check_task=CheckTasks.err_res, check_items=error) @pytest.mark.tags(CaseLabel.L1) - @pytest.mark.parametrize("scalar_type", ["JSON", "ARRAY"]) + @pytest.mark.parametrize("scalar_type", ["JSON", "Array"]) def test_create_collection_default_value_on_not_support_scalar_field(self, scalar_type): """ target: test create collection with set default value on not supported scalar field @@ -4662,12 +4675,13 @@ def test_create_collection_default_value_on_not_support_scalar_field(self, scala # add other vector fields to maximum fields num if scalar_type == "JSON": int_fields.append(cf.gen_json_field(default_value=10)) - if scalar_type == "ARRAY": + if scalar_type == "Array": int_fields.append(cf.gen_array_field(default_value=10)) int_fields.append(cf.gen_int64_field(is_primary=True, default_value=10)) int_fields.append(cf.gen_float_vec_field()) schema = cf.gen_collection_schema(fields=int_fields) - error = {ct.err_code: 1100, ct.err_msg: "default value type mismatches field schema type"} + error = {ct.err_code: 1100, + ct.err_msg: f"type not support default_value, type:{scalar_type}"} self.collection_wrap.init_collection(c_name, schema=schema, check_task=CheckTasks.err_res, check_items=error) @pytest.mark.tags(CaseLabel.L1) @@ -4685,7 +4699,8 @@ def test_create_collection_non_match_default_value(self): int_fields.append(cf.gen_int8_field(default_value=10.0)) int_fields.append(cf.gen_float_vec_field()) schema = cf.gen_collection_schema(fields=int_fields) - error = {ct.err_code: 1100, ct.err_msg: "default value type mismatches field schema type"} + error = {ct.err_code: 1100, + ct.err_msg: "type (Int8) of field (int8) is not equal to the type(DataType_Double) of default_value"} self.collection_wrap.init_collection(c_name, schema=schema, check_task=CheckTasks.err_res, check_items=error) @pytest.mark.tags(CaseLabel.L1) diff --git a/tests/python_client/testcases/test_compaction.py b/tests/python_client/testcases/test_compaction.py index 72be19fc1ab8c..759eb9462ce47 100644 --- a/tests/python_client/testcases/test_compaction.py +++ b/tests/python_client/testcases/test_compaction.py @@ -31,7 +31,7 @@ def test_compact_without_connection(self): self.connection_wrap.remove_connection(ct.default_alias) res_list, _ = self.connection_wrap.list_connections() assert ct.default_alias not in res_list - error = {ct.err_code: 1, ct.err_msg: "should create connect first"} + error = {ct.err_code: 999, ct.err_msg: "should create connection first"} collection_w.compact(check_task=CheckTasks.err_res, check_items=error) @pytest.mark.tags(CaseLabel.L1) diff --git a/tests/python_client/testcases/test_connection.py b/tests/python_client/testcases/test_connection.py index b04841b3c5f44..ed30df5887327 100644 --- a/tests/python_client/testcases/test_connection.py +++ b/tests/python_client/testcases/test_connection.py @@ -39,17 +39,26 @@ def test_connection_add_connection_kwargs_without_host_port(self, data): ('_kwargs', None)]}) @pytest.mark.tags(ct.CaseLabel.L2) - @pytest.mark.parametrize("data", ct.get_dict_invalid_host_port) - def test_connection_add_connection_kwargs_invalid_host_port(self, data): + def test_connection_add_connection_kwargs_invalid_host_port(self): """ target: test **kwargs of add_connection method: passing invalid value for host and port expected: report error """ - # check param of **kwargs + for data in [{"port": "port"}, {"port": ["192.168.1.1"]}]: + self.connection_wrap.add_connection(_kwargs=data, check_task=ct.CheckTasks.err_res, + check_items={ct.err_code: 999, + ct.err_msg: "Type of 'port' must be str or int"}) + for data in [{"host": -1}]: + self.connection_wrap.add_connection(_kwargs=data, check_task=ct.CheckTasks.err_res, + check_items={ct.err_code: 999, + ct.err_msg: "Type of 'host' must be str"}) + + data = {"port": "-1", "host": "hostlocal"} self.connection_wrap.add_connection(_kwargs=data, check_task=ct.CheckTasks.err_res, - check_items={ct.err_code: 1, ct.err_msg: cem.NoHostPort}) + check_items={ct.err_code: 999, + ct.err_msg: "port number -1 out of range, valid range [0, 65535)"}) # get addr of default alias self.connection_wrap.get_connection_addr(alias=DefaultConfig.DEFAULT_USING, check_task=ct.CheckTasks.ccr, @@ -74,7 +83,7 @@ def test_connection_connect_kwargs_param_check(self): # No check for **kwargs self.connection_wrap.connect(alias=DefaultConfig.DEFAULT_USING, host=1, check_task=ct.CheckTasks.err_res, - check_items={ct.err_code: 1, ct.err_msg: cem.NoHostPort}) + check_items={ct.err_code: 999, ct.err_msg: "Type of 'host' must be str"}) @pytest.mark.tags(ct.CaseLabel.L2) @pytest.mark.parametrize("alias", ct.get_not_string) @@ -444,7 +453,8 @@ def test_connection_connect_default_alias_invalid(self, port): # using default alias to create connection, the connection does not exist err_msg = cem.FailConnect % ("host", str(port)) self.connection_wrap.connect(alias=DefaultConfig.DEFAULT_USING, check_task=ct.CheckTasks.err_res, - check_items={ct.err_code: 2, ct.err_msg: err_msg}) + check_items={ct.err_code: 999, + ct.err_msg: "illegal connection params or server unavailable"}) # list all connections and check the response self.connection_wrap.list_connections(check_task=ct.CheckTasks.ccr, @@ -791,7 +801,7 @@ def test_connection_init_collection_connection(self, host, port): # drop collection failed self.collection_wrap.drop(check_task=ct.CheckTasks.err_res, - check_items={ct.err_code: 1, ct.err_msg: "should create connect first"}) + check_items={ct.err_code: 1, ct.err_msg: "should create connection first"}) # successfully created default connection self.connection_wrap.connect(alias=DefaultConfig.DEFAULT_USING, host=host, port=port, @@ -892,7 +902,7 @@ def test_connect_with_invalid_ip(self, host, port): err_msg = "Type of 'host' must be str." self.connection_wrap.connect(alias=DefaultConfig.DEFAULT_USING, host=host, port=port, check_task=ct.CheckTasks.check_value_equal, - check_items={ct.err_code: 1, ct.err_msg: err_msg}) + check_items={ct.err_code: 999, ct.err_msg: err_msg}) class TestConnectPortInvalid(TestcaseBase): @@ -911,7 +921,7 @@ def test_connect_with_invalid_port(self, host, port): err_msg = "Type of 'host' must be str." self.connection_wrap.connect(alias=DefaultConfig.DEFAULT_USING, host=host, port=port, check_task=ct.CheckTasks.check_value_equal, - check_items={ct.err_code: 1, ct.err_msg: err_msg}) + check_items={ct.err_code: 999, ct.err_msg: err_msg}) class TestConnectUriInvalid(TestcaseBase): @@ -930,7 +940,8 @@ def test_parameters_with_invalid_protocol(self, host, port, connect_name, protoc uri = "{}://{}:{}".format(protocol, host, port) self.connection_wrap.connect(alias=connect_name, uri=uri, check_task=ct.CheckTasks.err_res, - check_items={ct.err_code: 1}) + check_items={ct.err_code: 999, + ct.err_msg: "needs start with [unix, http, https, tcp] or a local file endswith [.db]"}) @pytest.mark.tags(ct.CaseLabel.L2) @pytest.mark.parametrize("host", ["256.256.256.256", "10.1.0"]) @@ -945,7 +956,8 @@ def test_parameters_with_invalid_host(self, host, port, connect_name, protocol): uri = "{}://{}:{}".format(protocol, host, port) self.connection_wrap.connect(alias=connect_name, uri=uri, check_task=ct.CheckTasks.err_res, - check_items={ct.err_code: 2}) + check_items={ct.err_code: 999, + ct.err_msg: "illegal connection params or server unavailable"}) @pytest.mark.tags(ct.CaseLabel.L2) @pytest.mark.parametrize("port", ["8080", "443", "0", "65534"]) @@ -960,7 +972,8 @@ def test_parameters_with_invalid_port(self, host, port, connect_name, protocol): uri = "{}://{}:{}".format(protocol, host, port) self.connection_wrap.connect(alias=connect_name, uri=uri, check_task=ct.CheckTasks.err_res, - check_items={ct.err_code: 2}) + check_items={ct.err_code: 999, + ct.err_msg: "illegal connection params or server unavailable"}) @pytest.mark.tags(ct.CaseLabel.L2) @pytest.mark.parametrize("host", ["www.google.com"]) @@ -976,7 +989,8 @@ def test_parameters_with_invalid_url(self, host, port, connect_name, protocol): uri = "{}://{}:{}".format(protocol, host, port) self.connection_wrap.connect(alias=connect_name, uri=uri, check_task=ct.CheckTasks.err_res, - check_items={ct.err_code: 2}) + check_items={ct.err_code: 999, + ct.err_msg: "illegal connection params or server unavailable"}) class TestConnectAddressInvalid(TestcaseBase): @@ -994,7 +1008,8 @@ def test_parameters_with_invalid_address(self, host, port, connect_name): """ address = "{}:{}".format(host, port) self.connection_wrap.connect(alias=connect_name, address=address, check_task=ct.CheckTasks.err_res, - check_items={ct.err_code: 2}) + check_items={ct.err_code: 999, + ct.err_msg: "illegal connection params or server unavailable"}) @pytest.mark.tags(ct.CaseLabel.L2) @pytest.mark.parametrize("port", ["100", "65536"]) @@ -1007,7 +1022,8 @@ def test_parameters_with_invalid_address_port(self, host, port, connect_name): """ address = "{}:{}".format(host, port) self.connection_wrap.connect(alias=connect_name, address=address, check_task=ct.CheckTasks.err_res, - check_items={ct.err_code: 2}) + check_items={ct.err_code: 999, + ct.err_msg: "illegal connection params or server unavailable"}) class TestConnectUserPasswordInvalid(TestcaseBase): diff --git a/tests/python_client/testcases/test_delete.py b/tests/python_client/testcases/test_delete.py index 9dcfdb8fade63..7580914d1a1b8 100644 --- a/tests/python_client/testcases/test_delete.py +++ b/tests/python_client/testcases/test_delete.py @@ -95,7 +95,7 @@ def test_delete_without_connection(self): self.connection_wrap.remove_connection(ct.default_alias) res_list, _ = self.connection_wrap.list_connections() assert ct.default_alias not in res_list - error = {ct.err_code: 1, ct.err_msg: "should create connect first"} + error = {ct.err_code: 1, ct.err_msg: "should create connection first"} collection_w.delete(expr=tmp_expr, check_task=CheckTasks.err_res, check_items=error) # Not Milvus Exception @@ -108,7 +108,7 @@ def test_delete_expr_none(self): """ # init collection with tmp_nb default data collection_w = self.init_collection_general(prefix, nb=tmp_nb, insert_data=True)[0] - error = {ct.err_code: 1, ct.err_msg: "expr cannot be None"} + error = {ct.err_code: 999, ct.err_msg: "cannot be None"} collection_w.delete(expr=None, check_task=CheckTasks.err_res, check_items=error) @pytest.mark.tags(CaseLabel.L2) @@ -121,7 +121,7 @@ def test_delete_expr_non_string(self, expr): """ # init collection with tmp_nb default data collection_w = self.init_collection_general(prefix, nb=tmp_nb, insert_data=True)[0] - error = {ct.err_code: 1, ct.err_msg: f"expr value {expr} is illegal"} + error = {ct.err_code: 999, ct.err_msg: f"value {expr} is illegal"} collection_w.delete(expr, check_task=CheckTasks.err_res, check_items=error) @pytest.mark.tags(CaseLabel.L2) @@ -195,8 +195,7 @@ def test_delete_expr_with_vector(self): is_all_data_type=True, is_index=True)[0] expr = f"{ct.default_float_vec_field_name} in [[0.1]]" error = {ct.err_code: 1100, - ct.err_msg: f"failed to create delete plan: cannot parse expression: {expr}, " - f"error: value '[0.1]' in list cannot be casted to FloatVector: invalid parameter"} + ct.err_msg: f"failed to create delete plan: cannot parse expression: {expr}"} collection_w.delete(expr, check_task=CheckTasks.err_res, check_items=error) @@ -629,8 +628,8 @@ def test_delete_not_existed_partition(self): collection_w = self.init_collection_general(prefix, nb=tmp_nb, insert_data=True)[0] # raise exception - error = {ct.err_code: 200, - ct.err_msg: f"Failed to get partition id: partition={ct.default_tag}: partition not found"} + error = {ct.err_code: 999, + ct.err_msg: f"Failed to get partition id: partition not found[partition={ct.default_tag}]"} collection_w.delete(tmp_expr, partition_name=ct.default_tag, check_task=CheckTasks.err_res, check_items=error) @@ -1934,9 +1933,9 @@ class TestDeleteComplexExpr(TestcaseBase): """ @pytest.mark.tags(CaseLabel.L0) - @pytest.mark.parametrize("expression", cf.gen_normal_expressions()[1:]) + @pytest.mark.parametrize("expressions", cf.gen_normal_expressions_and_templates()[1:]) @pytest.mark.parametrize("enable_dynamic_field", [True, False]) - def test_delete_normal_expressions(self, expression, enable_dynamic_field): + def test_delete_normal_expressions(self, expressions, enable_dynamic_field): """ target: test delete entities using normal expression method: delete using normal expression @@ -1948,7 +1947,7 @@ def test_delete_normal_expressions(self, expression, enable_dynamic_field): # filter result with expression in collection _vectors = _vectors[0] - expression = expression.replace("&&", "and").replace("||", "or") + expression = expressions[0].replace("&&", "and").replace("||", "or") filter_ids = [] for i, _id in enumerate(insert_ids): if enable_dynamic_field: @@ -1967,10 +1966,46 @@ def test_delete_normal_expressions(self, expression, enable_dynamic_field): # query to check collection_w.query(f"int64 in {filter_ids}", check_task=CheckTasks.check_query_empty) + @pytest.mark.tags(CaseLabel.L0) + @pytest.mark.parametrize("expressions", cf.gen_normal_expressions_and_templates()[1:]) + @pytest.mark.parametrize("enable_dynamic_field", [True, False]) + def test_delete_normal_expressions_templates(self, expressions, enable_dynamic_field): + """ + target: test delete entities using normal expression + method: delete using normal expression + expected: delete successfully + """ + # init collection with nb default data + collection_w, _vectors, _, insert_ids = \ + self.init_collection_general(prefix, True, enable_dynamic_field=enable_dynamic_field)[0:4] + + # filter result with expression in collection + _vectors = _vectors[0] + expr = expressions[0].replace("&&", "and").replace("||", "or") + filter_ids = [] + for i, _id in enumerate(insert_ids): + if enable_dynamic_field: + int64 = _vectors[i][ct.default_int64_field_name] + float = _vectors[i][ct.default_float_field_name] + else: + int64 = _vectors.int64[i] + float = _vectors.float[i] + if not expr or eval(expr): + filter_ids.append(_id) + + # delete with expressions templates + expr = cf.get_expr_from_template(expressions[1]).replace("&&", "and").replace("||", "or") + expr_params = cf.get_expr_params_from_template(expressions[1]) + res = collection_w.delete(expr=expr, expr_params=expr_params)[0] + assert res.delete_count == len(filter_ids) + + # query to check + collection_w.query(f"int64 in {filter_ids}", check_task=CheckTasks.check_query_empty) + @pytest.mark.tags(CaseLabel.L2) - @pytest.mark.parametrize("expression", cf.gen_array_field_expressions()) + @pytest.mark.parametrize("expressions", cf.gen_array_field_expressions_and_templates()) @pytest.mark.parametrize("enable_dynamic_field", [True, False]) - def test_delete_array_expressions(self, expression, enable_dynamic_field): + def test_delete_array_expressions(self, expressions, enable_dynamic_field): """ target: test delete entities using normal expression method: delete using normal expression @@ -1993,25 +2028,73 @@ def test_delete_array_expressions(self, expression, enable_dynamic_field): data.append(arr) collection_w.insert(data) collection_w.flush() + collection_w.create_index(ct.default_float_vec_field_name, ct.default_flat_index) + collection_w.load() # 3. filter result with expression in collection - expression = expression.replace("&&", "and").replace("||", "or") + expr = expressions[0].replace("&&", "and").replace("||", "or") filter_ids = [] for i in range(nb): int32_array = data[i][ct.default_int32_array_field_name] float_array = data[i][ct.default_float_array_field_name] string_array = data[i][ct.default_string_array_field_name] - if not expression or eval(expression): + if not expr or eval(expr): filter_ids.append(i) # 4. delete by array expression + res = collection_w.delete(expr)[0] + assert res.delete_count == len(filter_ids) + + # 5. query to check + collection_w.query(expr, check_task=CheckTasks.check_query_empty) + + @pytest.mark.tags(CaseLabel.L2) + @pytest.mark.parametrize("expressions", cf.gen_array_field_expressions_and_templates()) + @pytest.mark.parametrize("enable_dynamic_field", [True, False]) + def test_delete_array_expressions_templates(self, expressions, enable_dynamic_field): + """ + target: test delete entities using normal expression + method: delete using normal expression + expected: delete successfully + """ + # 1. create a collection + nb = ct.default_nb + schema = cf.gen_array_collection_schema() + collection_w = self.init_collection_wrap(schema=schema, enable_dynamic_field=enable_dynamic_field) + + # 2. insert data + array_length = 100 + data = [] + for i in range(nb): + arr = {ct.default_int64_field_name: i, + ct.default_float_vec_field_name: cf.gen_vectors(1, ct.default_dim)[0], + ct.default_int32_array_field_name: [np.int32(i) for i in range(array_length)], + ct.default_float_array_field_name: [np.float32(i) for i in range(array_length)], + ct.default_string_array_field_name: [str(i) for i in range(array_length)]} + data.append(arr) + collection_w.insert(data) + collection_w.flush() collection_w.create_index(ct.default_float_vec_field_name, ct.default_flat_index) collection_w.load() - res = collection_w.delete(expression)[0] + + # 3. filter result with expression in collection + expr = expressions[0].replace("&&", "and").replace("||", "or") + filter_ids = [] + for i in range(nb): + int32_array = data[i][ct.default_int32_array_field_name] + float_array = data[i][ct.default_float_array_field_name] + string_array = data[i][ct.default_string_array_field_name] + if not expr or eval(expr): + filter_ids.append(i) + + # 4. delete by array expression + expr = cf.get_expr_from_template(expressions[1]).replace("&&", "and").replace("||", "or") + expr_params = cf.get_expr_params_from_template(expressions[1]) + res = collection_w.delete(expr=expr, expr_params=expr_params)[0] assert res.delete_count == len(filter_ids) # 5. query to check - collection_w.query(expression, check_task=CheckTasks.check_query_empty) + collection_w.query(expr=expr, expr_params=expr_params, check_task=CheckTasks.check_query_empty) @pytest.mark.tags(CaseLabel.L1) @pytest.mark.parametrize("field_name", ["varchar", "json_field['string']", "NewStr"]) @@ -2069,7 +2152,7 @@ def test_delete_expr_empty_string(self): collection_w = self.init_collection_general(prefix, True)[0] # delete - error = {ct.err_code: 1, ct.err_msg: "expr cannot be empty"} + error = {ct.err_code: 1, ct.err_msg: "cannot be empty"} collection_w.delete(expr="", check_task=CheckTasks.err_res, check_items=error) @pytest.mark.tags(CaseLabel.L2) @@ -2206,9 +2289,9 @@ def test_delete_expr_compare_two_variables(self, expressions): collection_w.delete(expressions, check_task=CheckTasks.err_res, check_items=error) @pytest.mark.tags(CaseLabel.L2) - @pytest.mark.parametrize("expression", cf.gen_json_field_expressions()) + @pytest.mark.parametrize("expressions", cf.gen_json_field_expressions_and_templates()) @pytest.mark.parametrize("enable_dynamic_field", [True, False]) - def test_delete_expr_json_field(self, expression, enable_dynamic_field): + def test_delete_expr_json_field(self, expressions, enable_dynamic_field): """ target: test delete entities using normal expression method: delete using normal expression @@ -2220,7 +2303,7 @@ def test_delete_expr_json_field(self, expression, enable_dynamic_field): # filter result with expression in collection _vectors = _vectors[0] - expression = expression.replace("&&", "and").replace("||", "or") + expr = expressions[0].replace("&&", "and").replace("||", "or") filter_ids = [] json_field = {} for i, _id in enumerate(insert_ids): @@ -2230,21 +2313,20 @@ def test_delete_expr_json_field(self, expression, enable_dynamic_field): else: json_field['number'] = _vectors[ct.default_json_field_name][i]['number'] json_field['float'] = _vectors[ct.default_json_field_name][i]['float'] - if not expression or eval(expression): + if not expr or eval(expr): filter_ids.append(_id) # delete with expressions - res = collection_w.delete(expression)[0] + res = collection_w.delete(expr)[0] assert res.delete_count == len(filter_ids) # query to check collection_w.query(f"int64 in {filter_ids}", check_task=CheckTasks.check_query_empty) @pytest.mark.tags(CaseLabel.L2) - @pytest.mark.parametrize("normal_expression, json_expression", zip(cf.gen_normal_expressions()[1:4], - cf.gen_json_field_expressions()[6:9])) + @pytest.mark.parametrize("expressions", cf.gen_json_field_expressions_and_templates()) @pytest.mark.parametrize("enable_dynamic_field", [True, False]) - def test_delete_expr_complex_mixed(self, normal_expression, json_expression, enable_dynamic_field): + def test_delete_expr_templtes_json_field(self, expressions, enable_dynamic_field): """ target: test delete entities using normal expression method: delete using normal expression @@ -2255,9 +2337,47 @@ def test_delete_expr_complex_mixed(self, normal_expression, json_expression, ena self.init_collection_general(prefix, True, enable_dynamic_field=enable_dynamic_field)[0:4] # filter result with expression in collection - expression = normal_expression + ' and ' + json_expression _vectors = _vectors[0] - expression = expression.replace("&&", "and").replace("||", "or") + expr = expressions[0].replace("&&", "and").replace("||", "or") + filter_ids = [] + json_field = {} + for i, _id in enumerate(insert_ids): + if enable_dynamic_field: + json_field['number'] = _vectors[i][ct.default_json_field_name]['number'] + json_field['float'] = _vectors[i][ct.default_json_field_name]['float'] + else: + json_field['number'] = _vectors[ct.default_json_field_name][i]['number'] + json_field['float'] = _vectors[ct.default_json_field_name][i]['float'] + if not expr or eval(expr): + filter_ids.append(_id) + + # delete with expressions template + expr = cf.get_expr_from_template(expressions[1]).replace("&&", "and").replace("||", "or") + expr_params = cf.get_expr_params_from_template(expressions[1]) + res = collection_w.delete(expr=expr, expr_params=expr_params)[0] + assert res.delete_count == len(filter_ids) + + # query to check + collection_w.query(f"int64 in {filter_ids}", check_task=CheckTasks.check_query_empty) + + @pytest.mark.tags(CaseLabel.L2) + @pytest.mark.parametrize("normal_expressions, json_expressions", zip(cf.gen_normal_expressions_and_templates()[1:4], + cf.gen_json_field_expressions_and_templates()[6:9])) + @pytest.mark.parametrize("enable_dynamic_field", [True, False]) + def test_delete_expr_complex_mixed(self, normal_expressions, json_expressions, enable_dynamic_field): + """ + target: test delete entities using normal expression + method: delete using normal expression + expected: delete successfully + """ + # init collection with nb default data + collection_w, _vectors, _, insert_ids = \ + self.init_collection_general(prefix, True, enable_dynamic_field=enable_dynamic_field)[0:4] + + # filter result with expression in collection + expr = normal_expressions[0] + ' and ' + json_expressions[0] + _vectors = _vectors[0] + expr = expr.replace("&&", "and").replace("||", "or") filter_ids = [] json_field = {} for i, _id in enumerate(insert_ids): @@ -2271,11 +2391,14 @@ def test_delete_expr_complex_mixed(self, normal_expression, json_expression, ena json_field['float'] = _vectors[ct.default_json_field_name][i]['float'] int64 = _vectors.int64[i] float = _vectors.float[i] - if not expression or eval(expression): + if not expr or eval(expr): filter_ids.append(_id) - # delete with expressions - res = collection_w.delete(expression)[0] + # delete with expressions and template mixed + json_expr = cf.get_expr_from_template(json_expressions[1]).replace("&&", "and").replace("||", "or") + expr = normal_expressions[0] + ' and ' + json_expr + json_expr_params = cf.get_expr_params_from_template(json_expressions[1]) + res = collection_w.delete(expr=expr, expr_params=json_expr_params)[0] assert res.delete_count == len(filter_ids) # query to check diff --git a/tests/python_client/testcases/test_field_partial_load.py b/tests/python_client/testcases/test_field_partial_load.py index 43ab9d3e5d38f..16d0541105167 100644 --- a/tests/python_client/testcases/test_field_partial_load.py +++ b/tests/python_client/testcases/test_field_partial_load.py @@ -70,7 +70,6 @@ def test_field_partial_load_default(self): and not_load_int64_field.name in res[0][0].fields.keys() @pytest.mark.tags(CaseLabel.L1) - @pytest.mark.xfail(reason="issue #36353") def test_skip_load_dynamic_field(self): """ target: test skip load dynamic field diff --git a/tests/python_client/testcases/test_index.py b/tests/python_client/testcases/test_index.py index 950e3fe75367b..0a66545330cad 100644 --- a/tests/python_client/testcases/test_index.py +++ b/tests/python_client/testcases/test_index.py @@ -98,13 +98,9 @@ def test_index_type_invalid(self, index_type): collection_w = self.init_collection_wrap(name=c_name) index_params = copy.deepcopy(default_index_params) index_params["index_type"] = index_type - if not isinstance(index_params["index_type"], str): - msg = "must be str" - else: - msg = "invalid index type" self.index_wrap.init_index(collection_w.collection, default_field_name, index_params, check_task=CheckTasks.err_res, - check_items={ct.err_code: 1100, ct.err_msg: msg}) + check_items={ct.err_code: 1100, ct.err_msg: "invalid parameter["}) @pytest.mark.tags(CaseLabel.L1) def test_index_type_not_supported(self): @@ -238,8 +234,8 @@ def test_index_create_on_scalar_field(self): collection_w.create_index(ct.default_int64_field_name, {}) collection_w.load(check_task=CheckTasks.err_res, check_items={ct.err_code: 65535, - ct.err_msg: f"there is no vector index on field: [float_vector], " - f"please create index firstly: collection={collection_w.name}: index not found"}) + ct.err_msg: "there is no vector index on field: [float_vector], " + "please create index firstly"}) @pytest.mark.tags(CaseLabel.L2) def test_index_create_on_array_field(self): @@ -1092,9 +1088,8 @@ def test_create_index_invalid_metric_type_binary(self): binary_index_params = {'index_type': 'BIN_IVF_FLAT', 'metric_type': 'L2', 'params': {'nlist': 64}} collection_w.create_index(default_binary_vec_field_name, binary_index_params, index_name=binary_field_name, check_task=CheckTasks.err_res, - check_items={ct.err_code: 1100, - ct.err_msg: "metric type L2 not found or not supported, supported: " - "[HAMMING JACCARD SUBSTRUCTURE SUPERSTRUCTURE]"}) + check_items={ct.err_code: 999, + ct.err_msg: "binary vector index does not support metric type: L2"}) @pytest.mark.tags(CaseLabel.L2) @pytest.mark.parametrize("metric_type", ["L2", "IP", "COSINE", "JACCARD", "HAMMING"]) @@ -1107,12 +1102,12 @@ def test_create_binary_index_HNSW(self, metric_type): c_name = cf.gen_unique_str(prefix) collection_w = self.init_collection_wrap(name=c_name, schema=default_binary_schema) binary_index_params = {'index_type': 'HNSW', "M": '18', "efConstruction": '240', 'metric_type': metric_type} + error = {ct.err_code: 999, ct.err_msg: f"binary vector index does not support metric type: {metric_type}"} + if metric_type in ["JACCARD", "HAMMING"]: + error = {ct.err_code: 999, ct.err_msg: f"data type BinaryVector can't build with this index HNSW"} collection_w.create_index(default_binary_vec_field_name, binary_index_params, check_task=CheckTasks.err_res, - check_items={ct.err_code: 1100, - ct.err_msg: "HNSW only support float vector data type: invalid " - "parameter[expected=valid index params][actual=invalid " - "index params]"}) + check_items=error) @pytest.mark.tags(CaseLabel.L2) @pytest.mark.parametrize("metric", ct.binary_metrics) @@ -1257,7 +1252,7 @@ def test_create_index_json(self): collection_w.create_index(ct.default_json_field_name, check_task=CheckTasks.err_res, check_items={ct.err_code: 1100, - ct.err_msg: "create index on JSON field is not supported"}) + ct.err_msg: "create auto index on type:JSON is not supported"}) @pytest.mark.tags(CaseLabel.L1) def test_create_scalar_index_on_vector_field(self, scalar_index, vector_data_type): @@ -1286,7 +1281,7 @@ def test_create_scalar_index_on_binary_vector_field(self, scalar_index): collection_w.create_index(ct.default_binary_vec_field_name, index_params=scalar_index_params, check_task=CheckTasks.err_res, check_items={ct.err_code: 1100, - ct.err_msg: f"invalid index type: {scalar_index}"}) + ct.err_msg: "metric type not set for vector index"}) @pytest.mark.tags(CaseLabel.L1) def test_create_inverted_index_on_json_field(self, vector_data_type): @@ -1300,7 +1295,7 @@ def test_create_inverted_index_on_json_field(self, vector_data_type): collection_w.create_index(ct.default_json_field_name, index_params=scalar_index_params, check_task=CheckTasks.err_res, check_items={ct.err_code: 1100, - ct.err_msg: "create index on JSON field is not supported"}) + ct.err_msg: "INVERTED are not supported on JSON field"}) @pytest.mark.tags(CaseLabel.L1) def test_create_inverted_index_on_array_field(self): @@ -1433,7 +1428,7 @@ def test_alter_index_invalid(self): collection_w.alter_index(ct.default_index_name, {"error_param_key": 123}, check_task=CheckTasks.err_res, check_items={ct.err_code: 1100, - ct.err_msg: f"error_param is not configable index param"}) + ct.err_msg: f"error_param_key is not configable index param"}) collection_w.alter_index(ct.default_index_name, ["error_param_type"], check_task=CheckTasks.err_res, check_items={ct.err_code: 1, @@ -1483,8 +1478,8 @@ def test_invalid_sparse_ratio(self, ratio, index): data = cf.gen_default_list_sparse_data() collection_w.insert(data=data) params = {"index_type": index, "metric_type": "IP", "params": {"drop_ratio_build": ratio}} - error = {ct.err_code: 1100, - ct.err_msg: f"invalid drop_ratio_build: {ratio}, must be in range [0, 1): invalid parameter[expected=valid index params"} + error = {ct.err_code: 999, + ct.err_msg: f"Out of range in json: param 'drop_ratio_build' ({ratio*1.0}) should be in range [0.000000, 1.000000)"} index, _ = self.index_wrap.init_index(collection_w.collection, ct.default_sparse_vec_field_name, params, check_task=CheckTasks.err_res, check_items=error) @@ -2016,7 +2011,7 @@ def test_create_diskann_index_with_binary(self): collection_w.create_index(default_binary_vec_field_name, ct.default_diskann_index, index_name=binary_field_name, check_task=CheckTasks.err_res, check_items={ct.err_code: 1100, - ct.err_msg: "float or float16 vector are only supported"}) + ct.err_msg: "binary vector index does not support metric type: COSINE"}) @pytest.mark.tags(CaseLabel.L2) def test_create_diskann_index_multithread(self): @@ -2181,7 +2176,7 @@ def test_create_scann_index_nlist_invalid(self, nlist): """ collection_w = self.init_collection_general(prefix, is_index=False)[0] index_params = {"index_type": "SCANN", "metric_type": "L2", "params": {"nlist": nlist}} - error = {ct.err_code: 1100, ct.err_msg: "nlist out of range: [1, 65536]"} + error = {ct.err_code: 999, ct.err_msg: f"Out of range in json: param 'nlist' ({nlist}) should be in range [1, 65536]"} collection_w.create_index(default_field_name, index_params, check_task=CheckTasks.err_res, check_items=error) @@ -2196,7 +2191,7 @@ def test_create_scann_index_dim_invalid(self, dim): collection_w = self.init_collection_general(prefix, is_index=False, dim=dim)[0] index_params = {"index_type": "SCANN", "metric_type": "L2", "params": {"nlist": 1024}} error = {ct.err_code: 1100, - ct.err_msg: f"dimension must be able to be divided by 2, dimension: {dim}"} + ct.err_msg: f"The dimension of a vector (dim) should be a multiple of 2. Dimension:{dim}"} collection_w.create_index(default_field_name, index_params, check_task=CheckTasks.err_res, check_items=error) @@ -2386,7 +2381,7 @@ def test_bitmap_on_not_supported_fields(self, request): for msg, index_params in { iem.VectorMetricTypeExist: IndexPrams(index_type=IndexName.BITMAP), iem.SparseFloatVectorMetricType: IndexPrams(index_type=IndexName.BITMAP, metric_type=MetricType.L2), - iem.CheckVectorIndex.format(DataType.SPARSE_FLOAT_VECTOR, IndexName.BITMAP): IndexPrams( + iem.CheckVectorIndex.format("SparseFloatVector", IndexName.BITMAP): IndexPrams( index_type=IndexName.BITMAP, metric_type=MetricType.IP) }.items(): self.collection_wrap.create_index( diff --git a/tests/python_client/testcases/test_insert.py b/tests/python_client/testcases/test_insert.py index c7c317250f77b..45d00e33344e6 100644 --- a/tests/python_client/testcases/test_insert.py +++ b/tests/python_client/testcases/test_insert.py @@ -79,7 +79,7 @@ def test_insert_non_data_type(self): @pytest.mark.tags(CaseLabel.L2) @pytest.mark.parametrize("data", [pd.DataFrame()]) - def test_insert_empty_data(self, data): + def test_insert_empty_dataframe(self, data): """ target: test insert empty dataFrame() method: insert empty @@ -101,7 +101,7 @@ def test_insert_empty_data(self, data): """ c_name = cf.gen_unique_str(prefix) collection_w = self.init_collection_wrap(name=c_name) - error = {ct.err_code: 999, ct.err_msg: "The data don't match with schema fields"} + error = {ct.err_code: 999, ct.err_msg: "The data doesn't match with schema fields"} collection_w.insert( data=data, check_task=CheckTasks.err_res, check_items=error) @@ -134,7 +134,7 @@ def test_insert_empty_field_name_dataframe(self): df = cf.gen_default_dataframe_data(10) df.rename(columns={ct.default_int64_field_name: ' '}, inplace=True) error = {ct.err_code: 999, - ct.err_msg: "The name of field don't match, expected: int64"} + ct.err_msg: "The name of field doesn't match, expected: int64"} collection_w.insert( data=df, check_task=CheckTasks.err_res, check_items=error) @@ -152,7 +152,7 @@ def test_insert_invalid_field_name_dataframe(self): df.rename( columns={ct.default_int64_field_name: invalid_field_name}, inplace=True) error = {ct.err_code: 999, - ct.err_msg: f"The name of field don't match, expected: int64, got {invalid_field_name}"} + ct.err_msg: f"The name of field doesn't match, expected: int64, got {invalid_field_name}"} collection_w.insert( data=df, check_task=CheckTasks.err_res, check_items=error) @@ -218,6 +218,7 @@ def test_insert_single(self): assert collection_w.num_entities == 1 @pytest.mark.tags(CaseLabel.L2) + @pytest.mark.skip(reason="issue #37543") def test_insert_dim_not_match(self): """ target: test insert with not match dim @@ -227,8 +228,8 @@ def test_insert_dim_not_match(self): c_name = cf.gen_unique_str(prefix) collection_w = self.init_collection_wrap(name=c_name) dim = 129 - df = cf.gen_default_dataframe_data(ct.default_nb, dim=dim) - error = {ct.err_code: 65535, + df = cf.gen_default_dataframe_data(nb=20, dim=dim) + error = {ct.err_code: 999, ct.err_msg: f'Collection field dim is {ct.default_dim}, but entities field dim is {dim}'} collection_w.insert(data=df, check_task=CheckTasks.err_res, check_items=error) @@ -246,7 +247,7 @@ def test_insert_binary_dim_not_match(self): df, _ = cf.gen_default_binary_dataframe_data(ct.default_nb, dim=dim) error = {ct.err_code: 1100, ct.err_msg: f'the dim ({dim}) of field data(binary_vector) is not equal to schema dim ' - f'({ct.default_dim}): invalid parameter[expected={dim}][actual={ct.default_dim}]'} + f'({ct.default_dim}): invalid parameter[expected={ct.default_dim}][actual={dim}]'} collection_w.insert(data=df, check_task=CheckTasks.err_res, check_items=error) @pytest.mark.tags(CaseLabel.L2) @@ -260,7 +261,7 @@ def test_insert_field_name_not_match(self): collection_w = self.init_collection_wrap(name=c_name) df = cf.gen_default_dataframe_data(10) df.rename(columns={ct.default_float_field_name: "int"}, inplace=True) - error = {ct.err_code: 999, ct.err_msg: "The name of field don't match, expected: float, got int"} + error = {ct.err_code: 999, ct.err_msg: "The name of field doesn't match, expected: float, got int"} collection_w.insert( data=df, check_task=CheckTasks.err_res, check_items=error) @@ -337,7 +338,7 @@ def test_insert_fields_more(self): field_data = cf.gen_data_by_collection_field(fields, nb=nb) data.append(field_data) data.append([1 for _ in range(nb)]) - error = {ct.err_code: 999, ct.err_msg: "The data don't match with schema fields"} + error = {ct.err_code: 999, ct.err_msg: "The data doesn't match with schema fields"} collection_w.insert( data=data, check_task=CheckTasks.err_res, check_items=error) @@ -533,7 +534,7 @@ def test_insert_with_no_vector_field_dtype(self): field_data = cf.gen_data_by_collection_field(field, nb=nb) if field.dtype != DataType.FLOAT_VECTOR: data.append(field_data) - error = {ct.err_code: 999, ct.err_msg: f"The data don't match with schema fields, " + error = {ct.err_code: 999, ct.err_msg: f"The data doesn't match with schema fields, " f"expect {len(fields)} list, got {len(data)}"} collection_w.insert(data=data, check_task=CheckTasks.err_res, check_items=error) @@ -1320,8 +1321,7 @@ def test_insert_int8_overflow(self, invalid_int8): collection_w = self.init_collection_general(prefix, is_all_data_type=True)[0] data = cf.gen_dataframe_all_data_type(nb=1) data[ct.default_int8_field_name] = [invalid_int8] - error = {ct.err_code: 1100, 'err_msg': "The data type of field int8 doesn't match, " - "expected: INT8, got INT64"} + error = {ct.err_code: 1100, ct.err_msg: f"the 0th element ({invalid_int8}) out of range: [-128, 127]"} collection_w.insert(data, check_task=CheckTasks.err_res, check_items=error) @pytest.mark.tags(CaseLabel.L2) @@ -1335,8 +1335,7 @@ def test_insert_int16_overflow(self, invalid_int16): collection_w = self.init_collection_general(prefix, is_all_data_type=True)[0] data = cf.gen_dataframe_all_data_type(nb=1) data[ct.default_int16_field_name] = [invalid_int16] - error = {ct.err_code: 1100, 'err_msg': "The data type of field int16 doesn't match, " - "expected: INT16, got INT64"} + error = {ct.err_code: 1100, ct.err_msg: f"the 0th element ({invalid_int16}) out of range: [-32768, 32767]"} collection_w.insert(data, check_task=CheckTasks.err_res, check_items=error) @pytest.mark.tags(CaseLabel.L2) @@ -2239,7 +2238,7 @@ def test_upsert_vector_unmatch(self): collection_w = self.init_collection_wrap(name=c_name, with_json=False) data = cf.gen_default_binary_dataframe_data()[0] error = {ct.err_code: 999, - ct.err_msg: "The name of field don't match, expected: float_vector, got binary_vector"} + ct.err_msg: "The name of field doesn't match, expected: float_vector, got binary_vector"} collection_w.upsert(data=data, check_task=CheckTasks.err_res, check_items=error) @pytest.mark.tags(CaseLabel.L2) @@ -2254,7 +2253,7 @@ def test_upsert_binary_dim_unmatch(self, dim): collection_w = self.init_collection_general(pre_upsert, True, is_binary=True)[0] data = cf.gen_default_binary_dataframe_data(dim=dim)[0] error = {ct.err_code: 1100, - ct.err_msg: f"Collection field dim is 128, but entities field dim is {dim}"} + ct.err_msg: f"the dim ({dim}) of field data(binary_vector) is not equal to schema dim ({ct.default_dim})"} collection_w.upsert(data=data, check_task=CheckTasks.err_res, check_items=error) @pytest.mark.tags(CaseLabel.L2) @@ -2501,10 +2500,9 @@ def test_insert_array_length_invalid(self): collection_w = self.init_collection_wrap(schema=schema) # Insert actual array length > max_capacity arr_len = ct.default_max_capacity + 1 - data = cf.gen_row_data_by_schema(schema=schema,nb=11) + data = cf.gen_row_data_by_schema(schema=schema, nb=11) data[1][ct.default_float_array_field_name] = [np.float32(i) for i in range(arr_len)] - err_msg = (f"the length (101) of 1th array exceeds max capacity ({ct.default_max_capacity}): " - f"expected=valid length array, actual=array length exceeds max capacity: invalid parameter") + err_msg = (f"the length ({arr_len}) of 1th array exceeds max capacity ({ct.default_max_capacity})") collection_w.insert(data=data, check_task=CheckTasks.err_res, check_items={ct.err_code: 1100, ct.err_msg: err_msg}) diff --git a/tests/python_client/testcases/test_mix_scenes.py b/tests/python_client/testcases/test_mix_scenes.py index 80f8171c14ccb..b5f45843a5af4 100644 --- a/tests/python_client/testcases/test_mix_scenes.py +++ b/tests/python_client/testcases/test_mix_scenes.py @@ -873,7 +873,6 @@ def check_query_res(self, res, expr_field: str) -> list: return [(r[self.primary_field], r[expr_field], real_data[r[self.primary_field]]) for r in res if r[expr_field] != real_data[r[self.primary_field]]] - # https://github.com/milvus-io/milvus/issues/36221 @pytest.mark.tags(CaseLabel.L1) def test_bitmap_index_query_with_invalid_array_params(self): """ diff --git a/tests/python_client/testcases/test_partition.py b/tests/python_client/testcases/test_partition.py index cf2cb19e7390b..0af8ee21553bf 100644 --- a/tests/python_client/testcases/test_partition.py +++ b/tests/python_client/testcases/test_partition.py @@ -232,7 +232,7 @@ def test_partition_none_collection(self): self.partition_wrap.init_partition(collection=None, name=partition_name, check_task=CheckTasks.err_res, check_items={ct.err_code: 1, - ct.err_msg: "must be pymilvus.Collection"}) + ct.err_msg: "Collection must be of type pymilvus.Collection or String"}) @pytest.mark.tags(CaseLabel.L1) def test_partition_drop(self): @@ -1003,8 +1003,10 @@ def test_partition_insert_mismatched_dimensions(self, dim): data = cf.gen_default_list_data(nb=10, dim=dim) # insert data to partition + # TODO: update the assert error msg as #37543 fixed partition_w.insert(data, check_task=CheckTasks.err_res, - check_items={ct.err_code: 65535, ct.err_msg: "but entities field dim"}) + check_items={ct.err_code: 65535, + ct.err_msg: f"float data should divide the dim({ct.default_dim})"}) @pytest.mark.tags(CaseLabel.L1) @pytest.mark.parametrize("sync", [True, False]) @@ -1109,7 +1111,8 @@ def test_partition_upsert_mismatched_data(self): # upsert mismatched data upsert_data = cf.gen_default_data_for_upsert(dim=ct.default_dim-1)[0] - error = {ct.err_code: 65535, ct.err_msg: "Collection field dim is 128, but entities field dim is 127"} + # TODO: update the assert error msg as #37543 fixed + error = {ct.err_code: 65535, ct.err_msg: f"float data should divide the dim({ct.default_dim})"} partition_w.upsert(upsert_data, check_task=CheckTasks.err_res, check_items=error) @pytest.mark.tags(CaseLabel.L2) diff --git a/tests/python_client/testcases/test_query.py b/tests/python_client/testcases/test_query.py index e6b3335a724aa..3a05e8a124d05 100644 --- a/tests/python_client/testcases/test_query.py +++ b/tests/python_client/testcases/test_query.py @@ -79,9 +79,23 @@ def test_query_invalid(self): """ collection_w, entities = self.init_collection_general(prefix, insert_data=True, nb=10)[0:2] term_expr = f'{default_int_field_name} in {entities[:default_pos]}' - error = {ct.err_code: 1100, ct.err_msg: "cannot parse expression: int64 in .."} + error = {ct.err_code: 999, ct.err_msg: "cannot parse expression: int64 in"} collection_w.query(term_expr, check_task=CheckTasks.err_res, check_items=error) + # check missing the template variable + expr = "int64 in {value_0}" + expr_params = {"value_1": [0, 1]} + error = {ct.err_code: 999, ct.err_msg: "the value of expression template variable name {value_0} is not found"} + collection_w.query(expr=expr, expr_params=expr_params, + check_task=CheckTasks.err_res, check_items=error) + + # check the template variable type dismatch + expr = "int64 in {value_0}" + expr_params = {"value_0": 1} + error = {ct.err_code: 999, ct.err_msg: "the value of term expression template variable {value_0} is not array"} + collection_w.query(expr=expr, expr_params=expr_params, + check_task=CheckTasks.err_res, check_items=error) + @pytest.mark.tags(CaseLabel.L0) def test_query(self, enable_dynamic_field): """ @@ -409,43 +423,45 @@ def test_query_expr_by_int8_field(self): self.collection_wrap.query(term_expr, output_fields=["float", "int64", "int8", "varchar"], check_task=CheckTasks.check_query_results, check_items={exp_res: res}) - @pytest.fixture(scope="function", params=cf.gen_normal_expressions()) - def get_normal_expr(self, request): - if request.param == "": - pytest.skip("query with "" expr is invalid") - yield request.param - @pytest.mark.tags(CaseLabel.L1) - def test_query_with_expression(self, get_normal_expr, enable_dynamic_field): + def test_query_with_expression(self, enable_dynamic_field): """ target: test query with different expr method: query with different boolean expr expected: verify query result """ # 1. initialize with data - nb = 1000 + nb = 2000 collection_w, _vectors, _, insert_ids = self.init_collection_general(prefix, True, nb, enable_dynamic_field=enable_dynamic_field)[0:4] # filter result with expression in collection _vectors = _vectors[0] - expr = get_normal_expr - expression = expr.replace("&&", "and").replace("||", "or") - filter_ids = [] - for i, _id in enumerate(insert_ids): - if enable_dynamic_field: - int64 = _vectors[i][ct.default_int64_field_name] - float = _vectors[i][ct.default_float_field_name] - else: - int64 = _vectors.int64[i] - float = _vectors.float[i] - if not expression or eval(expression): - filter_ids.append(_id) - - # query and verify result - res = collection_w.query(expr=expression)[0] - query_ids = set(map(lambda x: x[ct.default_int64_field_name], res)) - assert query_ids == set(filter_ids) + for expressions in cf.gen_normal_expressions_and_templates(): + log.debug(f"test_query_with_expression: {expressions}") + expr = expressions[0].replace("&&", "and").replace("||", "or") + filter_ids = [] + for i, _id in enumerate(insert_ids): + if enable_dynamic_field: + int64 = _vectors[i][ct.default_int64_field_name] + float = _vectors[i][ct.default_float_field_name] + else: + int64 = _vectors.int64[i] + float = _vectors.float[i] + if not expr or eval(expr): + filter_ids.append(_id) + + # query and verify result + res = collection_w.query(expr=expr, limit=nb)[0] + query_ids = set(map(lambda x: x[ct.default_int64_field_name], res)) + assert query_ids == set(filter_ids) + + # query again with expression template + expr = cf.get_expr_from_template(expressions[1]).replace("&&", "and").replace("||", "or") + expr_params = cf.get_expr_params_from_template(expressions[1]) + res = collection_w.query(expr=expr, expr_params=expr_params, limit=nb)[0] + query_ids = set(map(lambda x: x[ct.default_int64_field_name], res)) + assert query_ids == set(filter_ids) @pytest.mark.tags(CaseLabel.L2) def test_query_expr_wrong_term_keyword(self): @@ -560,13 +576,16 @@ def test_query_expr_non_array_term(self): expected: raise exception """ exprs = [f'{ct.default_int64_field_name} in 1', - f'{ct.default_int64_field_name} in "in"', - f'{ct.default_int64_field_name} in (mn)'] + f'{ct.default_int64_field_name} in "in"'] collection_w, vectors = self.init_collection_general(prefix, insert_data=True)[0:2] - error = {ct.err_code: 1100, ct.err_msg: "cannot parse expression: int64 in 1, " - "error: line 1:9 no viable alternative at input 'in1'"} for expr in exprs: + error = {ct.err_code: 1100, ct.err_msg: f"cannot parse expression: {expr}, " + "error: the right-hand side of 'in' must be a list"} collection_w.query(expr, check_task=CheckTasks.err_res, check_items=error) + expr = f'{ct.default_int64_field_name} in (mn)' + error = {ct.err_code: 1100, ct.err_msg: f"cannot parse expression: {expr}, " + "error: field mn not exist"} + collection_w.query(expr, check_task=CheckTasks.err_res, check_items=error) @pytest.mark.tags(CaseLabel.L2) def test_query_expr_empty_term_array(self): @@ -589,12 +608,19 @@ def test_query_expr_inconsistent_mix_term_array(self): expected: raise exception """ collection_w = self.init_collection_wrap(cf.gen_unique_str(prefix)) - int_values = [[1., 2.], [1, 2.]] + values = [1., 2.] + term_expr = f'{ct.default_int64_field_name} in {values}' error = {ct.err_code: 1100, - ct.err_msg: "failed to create query plan: cannot parse expression: int64 in [1, 2.0]"} - for values in int_values: - term_expr = f'{ct.default_int64_field_name} in {values}' - collection_w.query(term_expr, check_task=CheckTasks.err_res, check_items=error) + ct.err_msg: f"cannot parse expression: int64 in {values}, " + "error: value 'float_val:1' in list cannot be casted to Int64"} + collection_w.query(term_expr, check_task=CheckTasks.err_res, check_items=error) + + values = [1, 2.] + term_expr = f'{ct.default_int64_field_name} in {values}' + error = {ct.err_code: 1100, + ct.err_msg: f"cannot parse expression: int64 in {values}, " + "error: value 'float_val:2' in list cannot be casted to Int64"} + collection_w.query(term_expr, check_task=CheckTasks.err_res, check_items=error) @pytest.mark.tags(CaseLabel.L2) def test_query_expr_non_constant_array_term(self): @@ -605,10 +631,9 @@ def test_query_expr_non_constant_array_term(self): """ collection_w, vectors = self.init_collection_general(prefix, insert_data=True)[0:2] constants = [[1], (), {}] - error = {ct.err_code: 1100, - ct.err_msg: "cannot parse expression: int64 in [[1]], error: value '[1]' in " - "list cannot be casted to Int64"} for constant in constants: + error = {ct.err_code: 1100, + ct.err_msg: f"cannot parse expression: int64 in [{constant}]"} term_expr = f'{ct.default_int64_field_name} in [{constant}]' collection_w.query(term_expr, check_task=CheckTasks.err_res, check_items=error) @@ -1797,7 +1822,7 @@ def test_query_binary_pagination(self, offset): assert key_res == int_values[offset: pos + offset] @pytest.mark.tags(CaseLabel.L2) - def test_query_pagination_with_expression(self, offset, get_normal_expr): + def test_query_pagination_with_expression(self, offset): """ target: test query pagination with different expression method: query with different expression and verify the result @@ -1809,20 +1834,27 @@ def test_query_pagination_with_expression(self, offset, get_normal_expr): # filter result with expression in collection _vectors = _vectors[0] - expr = get_normal_expr - expression = expr.replace("&&", "and").replace("||", "or") - filter_ids = [] - for i, _id in enumerate(insert_ids): - int64 = _vectors.int64[i] - float = _vectors.float[i] - if not expression or eval(expression): - filter_ids.append(_id) - - # query and verify result - query_params = {"offset": offset, "limit": 10} - res = collection_w.query(expr=expression, params=query_params)[0] - key_res = [item[key] for item in res for key in item] - assert key_res == filter_ids + for expressions in cf.gen_normal_expressions_and_templates()[1:]: + expr = expressions[0].replace("&&", "and").replace("||", "or") + filter_ids = [] + for i, _id in enumerate(insert_ids): + int64 = _vectors.int64[i] + float = _vectors.float[i] + if not expr or eval(expr): + filter_ids.append(_id) + + # query and verify result + query_params = {"offset": offset, "limit": 10} + res = collection_w.query(expr=expr, params=query_params)[0] + key_res = [item[key] for item in res for key in item] + assert key_res == filter_ids + + # query again with expression tempalte + expr = cf.get_expr_from_template(expressions[1]).replace("&&", "and").replace("||", "or") + expr_params = cf.get_expr_params_from_template(expressions[1]) + res = collection_w.query(expr=expr, expr_params=expr_params, params=query_params)[0] + key_res = [item[key] for item in res for key in item] + assert key_res == filter_ids @pytest.mark.tags(CaseLabel.L2) def test_query_pagination_with_partition(self, offset): @@ -1930,11 +1962,14 @@ def test_query_pagination_with_invalid_limit_value(self, limit): int_values = vectors[0][ct.default_int64_field_name].values.tolist() pos = 10 term_expr = f'{ct.default_int64_field_name} in {int_values[10: pos + 10]}' + error = {ct.err_code: 65535, + ct.err_msg: f"invalid max query result window, (offset+limit) should be in range [1, 16384], but got 67900"} + if limit == -1: + error = {ct.err_code: 65535, + ct.err_msg: f"invalid max query result window, limit [{limit}] is invalid, should be greater than 0"} collection_w.query(term_expr, offset=10, limit=limit, - check_task=CheckTasks.err_res, - check_items={ct.err_code: 65535, - ct.err_msg: f"invalid max query result window, (offset+limit) " - f"should be in range [1, 16384], but got {limit}"}) + check_task=CheckTasks.err_res, check_items=error) + @pytest.mark.tags(CaseLabel.L2) @pytest.mark.parametrize("offset", ["12 s", " ", [0, 1], {2}]) @@ -1967,11 +2002,13 @@ def test_query_pagination_with_invalid_offset_value(self, offset): int_values = vectors[0][ct.default_int64_field_name].values.tolist() pos = 10 term_expr = f'{ct.default_int64_field_name} in {int_values[10: pos + 10]}' + error = {ct.err_code: 65535, + ct.err_msg: f"invalid max query result window, (offset+limit) should be in range [1, 16384], but got 67900"} + if offset == -1: + error = {ct.err_code: 65535, + ct.err_msg: f"invalid max query result window, offset [{offset}] is invalid, should be gte than 0"} collection_w.query(term_expr, offset=offset, limit=10, - check_task=CheckTasks.err_res, - check_items={ct.err_code: 65535, - ct.err_msg: f"invalid max query result window, (offset+limit) " - f"should be in range [1, 16384], but got {offset}"}) + check_task=CheckTasks.err_res, check_items=error) @pytest.mark.tags(CaseLabel.L2) @pytest.mark.skip("not stable") @@ -2029,7 +2066,7 @@ def test_mmap_query_expr_empty_pk_string(self): check_task=CheckTasks.check_query_results, check_items={exp_res: res}) @pytest.mark.tags(CaseLabel.L1) - def test_enable_mmap_query_with_expression(self, get_normal_expr, enable_dynamic_field): + def test_enable_mmap_query_with_expression(self, enable_dynamic_field): """ target: turn on mmap use different expr queries method: turn on mmap and query with different expr @@ -2039,7 +2076,6 @@ def test_enable_mmap_query_with_expression(self, get_normal_expr, enable_dynamic nb = 1000 collection_w, _vectors, _, insert_ids = self.init_collection_general(prefix, True, nb, is_index=False, enable_dynamic_field=enable_dynamic_field)[0:4] - # enable mmap collection_w.set_properties({'mmap.enabled': True}) collection_w.create_index(ct.default_float_vec_field_name, default_index_params, index_name="query_expr_index") @@ -2047,23 +2083,31 @@ def test_enable_mmap_query_with_expression(self, get_normal_expr, enable_dynamic collection_w.load() # filter result with expression in collection _vectors = _vectors[0] - expr = get_normal_expr - expression = expr.replace("&&", "and").replace("||", "or") - filter_ids = [] - for i, _id in enumerate(insert_ids): - if enable_dynamic_field: - int64 = _vectors[i][ct.default_int64_field_name] - float = _vectors[i][ct.default_float_field_name] - else: - int64 = _vectors.int64[i] - float = _vectors.float[i] - if not expression or eval(expression): - filter_ids.append(_id) - - # query and verify result - res = collection_w.query(expr=expression)[0] - query_ids = set(map(lambda x: x[ct.default_int64_field_name], res)) - assert query_ids == set(filter_ids) + for expressions in cf.gen_normal_expressions_and_templates()[1:]: + log.debug(f"expr: {expressions}") + expr = expressions[0].replace("&&", "and").replace("||", "or") + filter_ids = [] + for i, _id in enumerate(insert_ids): + if enable_dynamic_field: + int64 = _vectors[i][ct.default_int64_field_name] + float = _vectors[i][ct.default_float_field_name] + else: + int64 = _vectors.int64[i] + float = _vectors.float[i] + if not expr or eval(expr): + filter_ids.append(_id) + + # query and verify result + res = collection_w.query(expr=expr)[0] + query_ids = set(map(lambda x: x[ct.default_int64_field_name], res)) + assert query_ids == set(filter_ids) + + # query again with expression template + expr = cf.get_expr_from_template(expressions[1]).replace("&&", "and").replace("||", "or") + expr_params = cf.get_expr_params_from_template(expressions[1]) + res = collection_w.query(expr=expr, expr_params=expr_params)[0] + query_ids = set(map(lambda x: x[ct.default_int64_field_name], res)) + assert query_ids == set(filter_ids) @pytest.mark.tags(CaseLabel.L2) def test_mmap_query_string_field_not_primary_is_empty(self): @@ -2686,8 +2730,7 @@ def test_query_with_invalid_string_expr(self, expression): collection_w = self.init_collection_general(prefix, insert_data=True)[0] collection_w.query(expression, check_task=CheckTasks.err_res, check_items={ct.err_code: 1100, - ct.err_msg: f"failed to create query plan: cannot parse expression: {expression}, " - f"error: value '1' in list cannot be casted to VarChar: invalid parameter"}) + ct.err_msg: f"failed to create query plan: cannot parse expression: {expression}"}) @pytest.mark.tags(CaseLabel.L1) def test_query_string_expr_with_binary(self): @@ -3823,8 +3866,7 @@ def test_count_disable_growing_segments(self): check_items={exp_res: [{count: 0}]}) @pytest.mark.tags(CaseLabel.L1) - @pytest.mark.parametrize("expression", cf.gen_normal_expressions()) - def test_count_expressions(self, expression): + def test_count_expressions(self): """ target: test count with expr method: count with expr @@ -3835,19 +3877,28 @@ def test_count_expressions(self, expression): # filter result with expression in collection _vectors = _vectors[0] - expression = expression.replace("&&", "and").replace("||", "or") - filter_ids = [] - for i, _id in enumerate(insert_ids): - int64 = _vectors.int64[i] - float = _vectors.float[i] - if not expression or eval(expression): - filter_ids.append(_id) - res = len(filter_ids) + for expressions in cf.gen_normal_expressions_and_templates(): + log.debug(f"query with expression: {expressions}") + expr = expressions[0].replace("&&", "and").replace("||", "or") + filter_ids = [] + for i, _id in enumerate(insert_ids): + int64 = _vectors.int64[i] + float = _vectors.float[i] + if not expr or eval(expr): + filter_ids.append(_id) + res = len(filter_ids) - # count with expr - collection_w.query(expr=expression, output_fields=[count], - check_task=CheckTasks.check_query_results, - check_items={exp_res: [{count: res}]}) + # count with expr + collection_w.query(expr=expr, output_fields=[count], + check_task=CheckTasks.check_query_results, + check_items={exp_res: [{count: res}]}) + + # count agian with expr template + expr = cf.get_expr_from_template(expressions[1]).replace("&&", "and").replace("||", "or") + expr_params = cf.get_expr_params_from_template(expressions[1]) + collection_w.query(expr=expr, expr_params=expr_params, output_fields=[count], + check_task=CheckTasks.check_query_results, + check_items={exp_res: [{count: res}]}) @pytest.mark.tags(CaseLabel.L2) @pytest.mark.parametrize("bool_type", [True, False, "true", "false"]) @@ -3885,8 +3936,7 @@ def test_count_bool_expressions(self, bool_type): check_items={exp_res: [{count: res}]}) @pytest.mark.tags(CaseLabel.L2) - @pytest.mark.parametrize("expression", cf.gen_normal_expressions_field(default_float_field_name)) - def test_count_expression_auto_field(self, expression): + def test_count_expression_auto_field(self): """ target: test count with expr method: count with expr @@ -3897,21 +3947,26 @@ def test_count_expression_auto_field(self, expression): # filter result with expression in collection _vectors = _vectors[0] - expression = expression.replace("&&", "and").replace("||", "or") - filter_ids = [] - for i, _id in enumerate(insert_ids): - float = _vectors.float[i] - if not expression or eval(expression): - filter_ids.append(_id) - res = len(filter_ids) + for expressions in cf.gen_normal_expressions_and_templates_field(default_float_field_name): + log.debug(f"query with expression: {expressions}") + expr = expressions[0].replace("&&", "and").replace("||", "or") + filter_ids = [] + for i, _id in enumerate(insert_ids): + float = _vectors.float[i] + if not expr or eval(expr): + filter_ids.append(_id) + res = len(filter_ids) - # count with expr - collection_w.query(expr=expression, output_fields=[count], - check_task=CheckTasks.check_query_results, - check_items={exp_res: [{count: res}]}) + # count with expr + collection_w.query(expr=expr, output_fields=[count], + check_task=CheckTasks.check_query_results, check_items={exp_res: [{count: res}]}) + # count with expr and expr_params + expr = cf.get_expr_from_template(expressions[1]).replace("&&", "and").replace("||", "or") + expr_params = cf.get_expr_params_from_template(expressions[1]) + collection_w.query(expr=expr, expr_params=expr_params, output_fields=[count], + check_task=CheckTasks.check_query_results, check_items={exp_res: [{count: res}]}) @pytest.mark.tags(CaseLabel.L2) - @pytest.mark.skip(reason="issue #25841") def test_count_expression_all_datatype(self): """ target: test count with expr @@ -3922,9 +3977,8 @@ def test_count_expression_all_datatype(self): collection_w = self.init_collection_general(insert_data=True, is_all_data_type=True)[0] # count with expr - expression = "int64 >= 0 && int32 >= 1999 && int16 >= 0 && int8 >= 0 && float <= 1999.0 && double >= 0" - # expression = "int64 == 1999" - collection_w.query(expr=expression, output_fields=[count], + expr = "int64 >= 0 && int32 >= 1999 && int16 >= 0 && int8 <= 0 && float <= 1999.0 && double >= 0" + collection_w.query(expr=expr, output_fields=[count], check_task=CheckTasks.check_query_results, check_items={exp_res: [{count: 1}]}) @@ -5826,12 +5880,11 @@ def test_query_invalid(self): expected: raise exception """ collection_w, entities = self.init_collection_general( - prefix, insert_data=True, nb=10 - )[0:2] + prefix, insert_data=True, nb=10)[0:2] test_cases = [ ( - "A_FUNCTION_THAT_DOES_NOT_EXIST()", - "function A_FUNCTION_THAT_DOES_NOT_EXIST() not found", + "A_FUNCTION_THAT_DOES_NOT_EXIST()".lower(), + "function A_FUNCTION_THAT_DOES_NOT_EXIST() not found".lower(), ), # empty ("empty()", "function empty() not found"), diff --git a/tests/python_client/testcases/test_search.py b/tests/python_client/testcases/test_search.py index 0be620f4d91c1..93584a2738688 100644 --- a/tests/python_client/testcases/test_search.py +++ b/tests/python_client/testcases/test_search.py @@ -36,7 +36,6 @@ pd.set_option("expand_frame_repr", False) - prefix = "search_collection" search_num = 10 max_dim = ct.max_dim @@ -141,7 +140,7 @@ def test_search_no_connection(self): default_search_exp, check_task=CheckTasks.err_res, check_items={"err_code": 1, - "err_msg": "should create connect first"}) + "err_msg": "should create connection first"}) @pytest.mark.tags(CaseLabel.L1) def test_search_no_collection(self): @@ -293,7 +292,7 @@ def test_search_invalid_params_type(self, index): if index == "FLAT": pytest.skip("skip in FLAT index") # 1. initialize with data - collection_w, _, _, insert_ids = self.init_collection_general(prefix, True, 5000, + collection_w, _, _, insert_ids = self.init_collection_general(prefix, True, 2000, is_index=False)[0:4] # 2. create index and load params = cf.get_index_params_params(index) @@ -302,6 +301,7 @@ def test_search_invalid_params_type(self, index): collection_w.load() # 3. search invalid_search_params = cf.gen_invalid_search_params_type() + # TODO: update the error msg assertion as #37543 fixed for invalid_search_param in invalid_search_params: if index == invalid_search_param["index_type"]: search_params = {"metric_type": "L2", @@ -311,9 +311,8 @@ def test_search_invalid_params_type(self, index): search_params, default_limit, default_search_exp, check_task=CheckTasks.err_res, - check_items={"err_code": 65535, - "err_msg": "failed to search: invalid param in json:" - " invalid json key invalid_key"}) + check_items={"err_code": 999, + "err_msg": "fail to search on QueryNode"}) @pytest.mark.skip("not support now") @pytest.mark.tags(CaseLabel.L1) @@ -372,9 +371,7 @@ def test_search_param_invalid_limit_value(self, limit): # 1. initialize with data collection_w = self.init_collection_general(prefix)[0] # 2. search with invalid limit (topK) - log.info("test_search_param_invalid_limit_value: searching with " - "invalid limit (topK) = %s" % limit) - err_msg = f"topk [{limit}] is invalid, top k should be in range [1, 16384], but got {limit}" + err_msg = f"topk [{limit}] is invalid, it should be in range [1, 16384]" if limit == 0: err_msg = "`limit` value 0 is illegal" collection_w.search(vectors[:default_nq], default_search_field, default_search_params, @@ -482,21 +479,47 @@ def test_search_param_invalid_expr_bool(self, invalid_expr_bool_value): "err_msg": "failed to create query plan"}) @pytest.mark.tags(CaseLabel.L2) - @pytest.mark.parametrize("expression", cf.gen_invalid_bool_expressions()) - def test_search_with_expression_invalid_bool(self, expression): + def test_search_with_expression_invalid_bool(self): """ target: test search invalid bool method: test search invalid bool expected: searched failed """ collection_w = self.init_collection_general(prefix, True, is_all_data_type=True)[0] - log.info("test_search_with_expression: searching with expression: %s" % expression) + expressions = ["bool", "true", "false"] + for expression in expressions: + log.debug(f"search with expression: {expression}") + collection_w.search(vectors[:default_nq], default_search_field, + default_search_params, default_limit, expression, + check_task=CheckTasks.err_res, + check_items={"err_code": 1100, + "err_msg": "failed to create query plan: predicate is not a " + "boolean expression: %s, data type: Bool" % expression}) + expression = "!bool" + log.debug(f"search with expression: {expression}") + collection_w.search(vectors[:default_nq], default_search_field, + default_search_params, default_limit, expression, + check_task=CheckTasks.err_res, + check_items={"err_code": 1100, + "err_msg": "cannot parse expression: !bool, " + "error: not op can only be applied on boolean expression"}) + expression = "int64 > 0 and bool" + log.debug(f"search with expression: {expression}") + collection_w.search(vectors[:default_nq], default_search_field, + default_search_params, default_limit, expression, + check_task=CheckTasks.err_res, + check_items={"err_code": 1100, + "err_msg": "cannot parse expression: int64 > 0 and bool, " + "error: 'and' can only be used between boolean expressions"}) + expression = "int64 > 0 or false" + log.debug(f"search with expression: {expression}") collection_w.search(vectors[:default_nq], default_search_field, default_search_params, default_limit, expression, check_task=CheckTasks.err_res, check_items={"err_code": 1100, - "err_msg": "failed to create query plan: predicate is not a " - "boolean expression: %s, data type: Bool" % expression}) + "err_msg": "cannot parse expression: int64 > 0 or false, " + "error: 'or' can only be used between boolean expressions"}) + @pytest.mark.tags(CaseLabel.L1) @pytest.mark.parametrize("expression", ["int64 like 33", "float LIKE 33"]) @@ -628,7 +651,8 @@ def test_search_with_output_fields_invalid_type(self, invalid_output_fields): ct.err_msg: err_msg}) @pytest.mark.tags(CaseLabel.L2) - @pytest.mark.parametrize("non_exiting_output_fields", [["non_exiting"], [ct.default_int64_field_name, "non_exiting"]]) + @pytest.mark.parametrize("non_exiting_output_fields", + [["non_exiting"], [ct.default_int64_field_name, "non_exiting"]]) def test_search_with_output_fields_non_existing(self, non_exiting_output_fields): """ target: test search with output fields @@ -711,7 +735,8 @@ def test_search_with_empty_collection(self, vector_data_type): # 2. search collection without data before load log.info("test_search_with_empty_collection: Searching empty collection %s" % collection_w.name) - err_msg = "collection" + collection_w.name + "was not loaded into memory" + # err_msg = "collection" + collection_w.name + "was not loaded into memory" + err_msg = "collection not loaded" vectors = cf.gen_vectors_based_on_vector_type(default_nq, default_dim, vector_data_type) collection_w.search(vectors[:default_nq], default_search_field, default_search_params, default_limit, default_search_exp, timeout=1, @@ -800,38 +825,6 @@ def test_search_partition_deleted(self): check_items={"err_code": 65535, "err_msg": "partition name search_partition_0 not found"}) - @pytest.mark.tags(CaseLabel.L2) - @pytest.mark.parametrize("index", ct.all_index_types[1:7]) - def test_search_different_index_invalid_params(self, index): - """ - target: test search with different index - method: test search with different index - expected: searched successfully - """ - # 1. initialize with data - collection_w, _, _, insert_ids = self.init_collection_general(prefix, True, 5000, - partition_num=1, - is_index=False)[0:4] - # 2. create different index - params = cf.get_index_params_params(index) - if params.get("m"): - if (default_dim % params["m"]) != 0: - params["m"] = default_dim // 4 - log.info("test_search_different_index_invalid_params: Creating index-%s" % index) - default_index = {"index_type": index, "params": params, "metric_type": "L2"} - collection_w.create_index("float_vector", default_index) - log.info("test_search_different_index_invalid_params: Created index-%s" % index) - collection_w.load() - # 3. search - log.info("test_search_different_index_invalid_params: Searching after " - "creating index-%s" % index) - search_params = cf.gen_invalid_search_param(index) - collection_w.search(vectors, default_search_field, - search_params[0], default_limit, - default_search_exp, - check_task=CheckTasks.err_res, - check_items={"err_code": 65535, "err_msg": "type must be number, but is string"}) - @pytest.mark.tags(CaseLabel.L2) def test_search_index_partition_not_existed(self): """ @@ -1287,7 +1280,7 @@ def test_search_dynamic_compare_two_fields(self): expr, check_task=CheckTasks.err_res, check_items={"err_code": 65535, - "err_msg": "UnknownError: unsupported right datatype JSON of compare expr"}) + "err_msg": "query failed: Operator::GetOutput failed"}) class TestCollectionSearch(TestcaseBase): @@ -1490,7 +1483,7 @@ def test_search_multi_vector_fields(self, nq, is_flush, vector_data_type): collection_w.search(vectors[:nq], search_field, default_search_params, default_limit, default_search_exp, - output_fields = [ct.default_float_field_name, ct.default_string_field_name], + output_fields=[ct.default_float_field_name, ct.default_string_field_name], check_task=CheckTasks.check_search_results, check_items={"nq": nq, "ids": insert_ids, @@ -1610,14 +1603,14 @@ def test_accurate_search_with_multi_segments(self): # 4. get inserted original data inserted_vectors = collection_w.query(expr="int64 >= 0", output_fields=[ - ct.default_float_vec_field_name]) + ct.default_float_vec_field_name]) original_vectors = [] for single in inserted_vectors[0]: single_vector = single[ct.default_float_vec_field_name] original_vectors.append(single_vector) # 5. Calculate the searched ids - limit = 2*nb + limit = 2 * nb vectors = [[random.random() for _ in range(dim)] for _ in range(1)] distances = [] for original_vector in original_vectors: @@ -1786,13 +1779,13 @@ def test_search_collection_after_release_load(self, nq, _async): expected: search successfully """ # 1. initialize without data - nb= 2000 + nb = 2000 dim = 64 auto_id = True enable_dynamic_field = True - collection_w, _, _, insert_ids, time_stamp = self.init_collection_general(prefix, True, nb, - 1, auto_id=auto_id, dim=dim, - enable_dynamic_field=enable_dynamic_field)[0:5] + collection_w, _, _, insert_ids, time_stamp = \ + self.init_collection_general(prefix, True, nb, 1, auto_id=auto_id, dim=dim, + enable_dynamic_field=enable_dynamic_field)[0:5] # 2. release collection log.info("test_search_collection_after_release_load: releasing collection %s" % collection_w.name) @@ -1973,8 +1966,7 @@ def test_search_min_dim(self, _async): auto_id = True enable_dynamic_field = False collection_w, _, _, insert_ids = self.init_collection_general(prefix, True, 100, - auto_id=auto_id, - dim=min_dim, + auto_id=auto_id, dim=min_dim, enable_dynamic_field=enable_dynamic_field)[0:4] # 2. search nq = 2 @@ -2063,11 +2055,9 @@ def test_search_HNSW_index_with_max_ef(self, M, efConstruction, _async): auto_id = True enable_dynamic_field = False self._connect() - collection_w, _, _, insert_ids, time_stamp = self.init_collection_general(prefix, True, - partition_num=1, - auto_id=auto_id, - dim=dim, is_index=False, - enable_dynamic_field=enable_dynamic_field)[0:5] + collection_w, _, _, insert_ids, time_stamp = \ + self.init_collection_general(prefix, True, partition_num=1, auto_id=auto_id, + dim=dim, is_index=False, enable_dynamic_field=enable_dynamic_field)[0:5] HNSW_index_params = {"M": M, "efConstruction": efConstruction} HNSW_index = {"index_type": "HNSW", "params": HNSW_index_params, "metric_type": "L2"} @@ -2098,11 +2088,9 @@ def test_search_HNSW_index_with_redundant_param(self, M, efConstruction, _async) auto_id = False enable_dynamic_field = False self._connect() - collection_w, _, _, insert_ids, time_stamp = self.init_collection_general(prefix, True, - partition_num=1, - auto_id=auto_id, - dim=dim, is_index=False, - enable_dynamic_field=enable_dynamic_field)[0:5] + collection_w, _, _, insert_ids, time_stamp = \ + self.init_collection_general(prefix, True, partition_num=1, auto_id=auto_id, + dim=dim, is_index=False, enable_dynamic_field=enable_dynamic_field)[0:5] # nlist is of no use HNSW_index_params = { "M": M, "efConstruction": efConstruction, "nlist": 100} @@ -2138,11 +2126,10 @@ def test_search_HNSW_index_with_min_ef(self, M, efConstruction, limit, _async): auto_id = True enable_dynamic_field = True self._connect() - collection_w, _, _, insert_ids, time_stamp = self.init_collection_general(prefix, True, 5000, - partition_num=1, - auto_id=auto_id, - dim=dim, is_index=False, - enable_dynamic_field=enable_dynamic_field)[0:5] + collection_w, _, _, insert_ids, time_stamp = \ + self.init_collection_general(prefix, True, 5000, partition_num=1, + auto_id=auto_id, dim=dim, is_index=False, + enable_dynamic_field=enable_dynamic_field)[0:5] HNSW_index_params = {"M": M, "efConstruction": efConstruction} HNSW_index = {"index_type": "HNSW", "params": HNSW_index_params, "metric_type": "L2"} @@ -2173,12 +2160,10 @@ def test_search_after_different_index_with_params(self, index, _async, scalar_in dim = 64 auto_id = False enable_dynamic_field = False - collection_w, _, _, insert_ids, time_stamp = self.init_collection_general(prefix, True, 5000, - partition_num=1, - is_all_data_type=True, - auto_id=auto_id, - dim=dim, is_index=False, - enable_dynamic_field=enable_dynamic_field)[0:5] + collection_w, _, _, insert_ids, time_stamp = \ + self.init_collection_general(prefix, True, 5000, partition_num=1, is_all_data_type=True, + auto_id=auto_id, dim=dim, is_index=False, + enable_dynamic_field=enable_dynamic_field)[0:5] # 2. create index on vector field and load params = cf.get_index_params_params(index) default_index = {"index_type": index, "params": params, "metric_type": "COSINE"} @@ -2228,13 +2213,11 @@ def test_search_after_different_index_with_params_all_vector_type_multiple_vecto if index == "DISKANN": pytest.skip("https://github.com/milvus-io/milvus/issues/30793") # 1. initialize with data - collection_w, _, _, insert_ids, time_stamp = self.init_collection_general(prefix, True, 5000, - partition_num=1, - is_all_data_type=True, - auto_id=auto_id, - dim=default_dim, is_index=False, - enable_dynamic_field=enable_dynamic_field, - multiple_dim_array=[default_dim, default_dim])[0:5] + collection_w, _, _, insert_ids, time_stamp =\ + self.init_collection_general(prefix, True, 5000, partition_num=1, is_all_data_type=True, + auto_id=auto_id, dim=default_dim, is_index=False, + enable_dynamic_field=enable_dynamic_field, + multiple_dim_array=[default_dim, default_dim])[0:5] # 2. create index on vector field and load params = cf.get_index_params_params(index) default_index = {"index_type": index, "params": params, "metric_type": "COSINE"} @@ -2278,11 +2261,10 @@ def test_search_after_different_index_with_params_gpu(self, index, _async): dim = 64 auto_id = False enable_dynamic_field = False - collection_w, _, _, insert_ids, time_stamp = self.init_collection_general(prefix, True, 5000, - partition_num=1, - auto_id=auto_id, - dim=dim, is_index=False, - enable_dynamic_field=enable_dynamic_field)[0:5] + collection_w, _, _, insert_ids, time_stamp = \ + self.init_collection_general(prefix, True, 5000, partition_num=1, auto_id=auto_id, + dim=dim, is_index=False, + enable_dynamic_field=enable_dynamic_field)[0:5] # 2. create index and load params = cf.get_index_params_params(index) if params.get("m"): @@ -2419,11 +2401,9 @@ def test_search_after_index_different_metric_type(self, index, _async, metric_ty dim = 64 auto_id = True enable_dynamic_field = True - collection_w, _vectors, _, insert_ids, time_stamp = self.init_collection_general(prefix, True, 5000, - partition_num=1, - auto_id=auto_id, - dim=dim, is_index=False, - enable_dynamic_field=enable_dynamic_field)[0:5] + collection_w, _vectors, _, insert_ids, time_stamp =\ + self.init_collection_general(prefix, True, 5000, partition_num=1, auto_id=auto_id, + dim=dim, is_index=False, enable_dynamic_field=enable_dynamic_field)[0:5] # 2. get vectors that inserted into collection original_vectors = [] if enable_dynamic_field: @@ -2486,11 +2466,9 @@ def test_search_after_release_recreate_index(self, index, _async, metric_type): dim = 64 auto_id = True enable_dynamic_field = False - collection_w, _vectors, _, insert_ids, time_stamp = self.init_collection_general(prefix, True, 5000, - partition_num=1, - auto_id=auto_id, - dim=dim, is_index=False, - enable_dynamic_field=enable_dynamic_field)[0:5] + collection_w, _vectors, _, insert_ids, time_stamp = \ + self.init_collection_general(prefix, True, 5000, partition_num=1, auto_id=auto_id, + dim=dim, is_index=False, enable_dynamic_field=enable_dynamic_field)[0:5] # 2. get vectors that inserted into collection original_vectors = [] if enable_dynamic_field: @@ -2555,11 +2533,9 @@ def test_search_after_index_different_metric_type_gpu(self, index, _async): dim = 64 auto_id = True enable_dynamic_field = False - collection_w, _, _, insert_ids, time_stamp = self.init_collection_general(prefix, True, 5000, - partition_num=1, - auto_id=auto_id, - dim=dim, is_index=False, - enable_dynamic_field=enable_dynamic_field)[0:5] + collection_w, _, _, insert_ids, time_stamp = \ + self.init_collection_general(prefix, True, 5000, partition_num=1, auto_id=auto_id, + dim=dim, is_index=False, enable_dynamic_field=enable_dynamic_field)[0:5] # 2. create different index params = cf.get_index_params_params(index) if params.get("m"): @@ -2600,8 +2576,7 @@ def test_search_collection_multiple_times(self, nq, _async): auto_id = False enable_dynamic_field = False collection_w, _, _, insert_ids = self.init_collection_general(prefix, True, nb, - auto_id=auto_id, - dim=dim, + auto_id=auto_id, dim=dim, enable_dynamic_field=enable_dynamic_field)[0:4] # 2. search for multiple times vectors = [[random.random() for _ in range(dim)] for _ in range(nq)] @@ -2630,10 +2605,9 @@ def test_search_sync_async_multiple_times(self, nq): dim = 64 auto_id = True enable_dynamic_field = False - collection_w, _, _, insert_ids, time_stamp = self.init_collection_general(prefix, True, nb, - auto_id=auto_id, - dim=dim, - enable_dynamic_field=enable_dynamic_field)[0:5] + collection_w, _, _, insert_ids, time_stamp = \ + self.init_collection_general(prefix, True, nb, auto_id=auto_id, dim=dim, + enable_dynamic_field=enable_dynamic_field)[0:5] # 2. search log.info("test_search_sync_async_multiple_times: searching collection %s" % collection_w.name) @@ -2695,11 +2669,9 @@ def test_search_index_one_partition(self, _async): nb = 1200 auto_id = False enable_dynamic_field = True - collection_w, _, _, insert_ids, time_stamp = self.init_collection_general(prefix, True, nb, - partition_num=1, - auto_id=auto_id, - is_index=False, - enable_dynamic_field=enable_dynamic_field)[0:5] + collection_w, _, _, insert_ids, time_stamp = \ + self.init_collection_general(prefix, True, nb, partition_num=1, auto_id=auto_id, + is_index=False, enable_dynamic_field=enable_dynamic_field)[0:5] # 2. create index default_index = {"index_type": "IVF_FLAT", @@ -2737,10 +2709,8 @@ def test_search_index_partitions(self, nq, _async): nb = 1000 auto_id = False collection_w, _, _, insert_ids = self.init_collection_general(prefix, True, nb, - partition_num=1, - auto_id=auto_id, - dim=dim, - is_index=False)[0:4] + partition_num=1, auto_id=auto_id, + dim=dim, is_index=False)[0:4] vectors = [[random.random() for _ in range(dim)] for _ in range(nq)] # 2. create index default_index = {"index_type": "IVF_FLAT", "params": {"nlist": 128}, "metric_type": "L2"} @@ -2761,9 +2731,8 @@ def test_search_index_partitions(self, nq, _async): "_async": _async}) @pytest.mark.tags(CaseLabel.L2) - @pytest.mark.parametrize("partition_names", - [["(.*)"], ["search(.*)"]]) - def test_search_index_partitions_fuzzy(self, nq, partition_names): + @pytest.mark.parametrize("partition_names", [["(.*)"], ["search(.*)"]]) + def test_search_index_partitions_fuzzy(self, partition_names): """ target: test search from partitions method: search from partitions with fuzzy @@ -2775,12 +2744,10 @@ def test_search_index_partitions_fuzzy(self, nq, partition_names): dim = 64 auto_id = False enable_dynamic_field = False - collection_w, _, _, insert_ids = self.init_collection_general(prefix, True, nb, - partition_num=1, - auto_id=auto_id, - dim=dim, - is_index=False, + collection_w, _, _, insert_ids = self.init_collection_general(prefix, True, nb, partition_num=1, + auto_id=auto_id, dim=dim, is_index=False, enable_dynamic_field=enable_dynamic_field)[0:4] + nq = 2 vectors = [[random.random() for _ in range(dim)] for _ in range(nq)] # 2. create index nlist = 128 @@ -2788,21 +2755,14 @@ def test_search_index_partitions_fuzzy(self, nq, partition_names): collection_w.create_index("float_vector", default_index) collection_w.load() # 3. search through partitions - log.info("test_search_index_partitions_fuzzy: searching through partitions") - limit = 1000 - limit_check = limit - par = collection_w.partitions + limit = 100 search_params = {"metric_type": "COSINE", "params": {"nprobe": nlist}} - if partition_names == ["search(.*)"]: - insert_ids = insert_ids[par[0].num_entities:] - if limit > par[1].num_entities: - limit_check = par[1].num_entities collection_w.search(vectors[:nq], default_search_field, - search_params, limit, default_search_exp, - partition_names, + search_params, limit=limit, expr=default_search_exp, + partition_names=partition_names, check_task=CheckTasks.err_res, check_items={ct.err_code: 65535, - ct.err_msg: "partition name %s not found" % partition_names}) + ct.err_msg: f"partition name {partition_names[0]} not found"}) @pytest.mark.tags(CaseLabel.L2) def test_search_index_partition_empty(self, nq, _async): @@ -2814,8 +2774,7 @@ def test_search_index_partition_empty(self, nq, _async): # 1. initialize with data dim = 64 auto_id = True - collection_w = self.init_collection_general(prefix, True, auto_id=auto_id, - dim=dim, is_index=False)[0] + collection_w = self.init_collection_general(prefix, True, auto_id=auto_id, dim=dim, is_index=False)[0] vectors = [[random.random() for _ in range(dim)] for _ in range(nq)] # 2. create empty partition partition_name = "search_partition_empty" @@ -2852,12 +2811,9 @@ def test_search_binary_jaccard_flat_index(self, nq, _async, index, is_flush): # 1. initialize with binary data dim = 64 auto_id = False - collection_w, _, binary_raw_vector, insert_ids, time_stamp = self.init_collection_general(prefix, True, 2, - is_binary=True, - auto_id=auto_id, - dim=dim, - is_index=False, - is_flush=is_flush)[0:5] + collection_w, _, binary_raw_vector, insert_ids, time_stamp = \ + self.init_collection_general(prefix, True, 2, is_binary=True, auto_id=auto_id, + dim=dim, is_index=False, is_flush=is_flush)[0:5] # 2. create index on sclalar and vector field default_index = {"index_type": "INVERTED", "params": {}} collection_w.create_index(ct.default_float_field_name, default_index) @@ -2896,12 +2852,9 @@ def test_search_binary_hamming_flat_index(self, nq, _async, index, is_flush): # 1. initialize with binary data dim = 64 auto_id = False - collection_w, _, binary_raw_vector, insert_ids = self.init_collection_general(prefix, True, 2, - is_binary=True, - auto_id=auto_id, - dim=dim, - is_index=False, - is_flush=is_flush)[0:4] + collection_w, _, binary_raw_vector, insert_ids = \ + self.init_collection_general(prefix, True, 2, is_binary=True, auto_id=auto_id, + dim=dim, is_index=False, is_flush=is_flush)[0:4] # 2. create index default_index = {"index_type": index, "params": { "nlist": 128}, "metric_type": "HAMMING"} @@ -2939,12 +2892,9 @@ def test_search_binary_tanimoto_flat_index(self, nq, _async, index, is_flush): # 1. initialize with binary data dim = 64 auto_id = False - collection_w, _, binary_raw_vector, insert_ids = self.init_collection_general(prefix, True, 2, - is_binary=True, - auto_id=auto_id, - dim=dim, - is_index=False, - is_flush=is_flush)[0:4] + collection_w, _, binary_raw_vector, insert_ids = \ + self.init_collection_general(prefix, True, 2, is_binary=True, auto_id=auto_id, + dim=dim, is_index=False, is_flush=is_flush)[0:4] log.info("auto_id= %s, _async= %s" % (auto_id, _async)) # 2. create index default_index = {"index_type": index, "params": { @@ -3076,65 +3026,83 @@ def test_search_binary_without_flush(self, metrics): "limit": default_limit}) @pytest.mark.tags(CaseLabel.L1) - @pytest.mark.parametrize("expression", cf.gen_normal_expressions()) - def test_search_with_expression(self, expression, _async, null_data_percent): + def test_search_with_expression(self, null_data_percent): """ target: test search with different expressions method: test search with different expressions expected: searched successfully with correct limit(topK) """ # 1. initialize with data - nb = 1000 + nb = 2000 dim = 64 enable_dynamic_field = False - collection_w, _vectors, _, insert_ids = self.init_collection_general(prefix, True, - nb, dim=dim, - is_index=False, - enable_dynamic_field=enable_dynamic_field, - nullable_fields={ct.default_float_field_name: null_data_percent})[0:4] - - # filter result with expression in collection - _vectors = _vectors[0] - expression = expression.replace("&&", "and").replace("||", "or") - filter_ids = [] - for i, _id in enumerate(insert_ids): - if enable_dynamic_field: - int64 = _vectors[i][ct.default_int64_field_name] - float = _vectors[i][ct.default_float_field_name] - else: - int64 = _vectors.int64[i] - float = _vectors.float[i] - if float is None and "float <=" in expression: - continue - if null_data_percent == 1 and "and float" in expression: - continue - if not expression or eval(expression): - filter_ids.append(_id) - + collection_w, _vectors, _, insert_ids = \ + self.init_collection_general(prefix, True,nb, dim=dim, is_index=False, + enable_dynamic_field=enable_dynamic_field, + nullable_fields={ct.default_float_field_name: null_data_percent})[0:4] # 2. create index index_param = {"index_type": "FLAT", "metric_type": "COSINE", "params": {}} collection_w.create_index("float_vector", index_param) collection_w.load() - # 3. search with expression - log.info("test_search_with_expression: searching with expression: %s" % expression) - vectors = [[random.random() for _ in range(dim)] for _ in range(default_nq)] - search_res, _ = collection_w.search(vectors[:default_nq], default_search_field, - default_search_params, nb, expression, - _async=_async, - check_task=CheckTasks.check_search_results, - check_items={"nq": default_nq, - "ids": insert_ids, - "limit": min(nb, len(filter_ids)), - "_async": _async}) - if _async: - search_res.done() - search_res = search_res.result() + # filter result with expression in collection + _vectors = _vectors[0] + for _async in [False, True]: + for expressions in cf.gen_normal_expressions_and_templates(): + log.debug(f"test_search_with_expression: {expressions}") + expr = expressions[0].replace("&&", "and").replace("||", "or") + filter_ids = [] + for i, _id in enumerate(insert_ids): + if enable_dynamic_field: + int64 = _vectors[i][ct.default_int64_field_name] + float = _vectors[i][ct.default_float_field_name] + else: + int64 = _vectors.int64[i] + float = _vectors.float[i] + if float is None and "float <=" in expr: + continue + if null_data_percent == 1 and "and float" in expr: + continue + if not expr or eval(expr): + filter_ids.append(_id) + + # 3. search with expression + vectors = [[random.random() for _ in range(dim)] for _ in range(default_nq)] + search_res, _ = collection_w.search(vectors[:default_nq], default_search_field, + default_search_params, nb, + expr=expr, _async=_async, + check_task=CheckTasks.check_search_results, + check_items={"nq": default_nq, + "ids": insert_ids, + "limit": min(nb, len(filter_ids)), + "_async": _async}) + if _async: + search_res.done() + search_res = search_res.result() + filter_ids_set = set(filter_ids) + for hits in search_res: + ids = hits.ids + assert set(ids).issubset(filter_ids_set) + + # 4. search again with expression template + epxr = cf.get_expr_from_template(expressions[1]).replace("&&", "and").replace("||", "or") + expr_params = cf.get_expr_params_from_template(expressions[1]) + search_res, _ = collection_w.search(vectors[:default_nq], default_search_field, + default_search_params, nb, + expr=expr, expr_params=expr_params, _async=_async, + check_task=CheckTasks.check_search_results, + check_items={"nq": default_nq, + "ids": insert_ids, + "limit": min(nb, len(filter_ids)), + "_async": _async}) + if _async: + search_res.done() + search_res = search_res.result() + filter_ids_set = set(filter_ids) + for hits in search_res: + ids = hits.ids + assert set(ids).issubset(filter_ids_set) - filter_ids_set = set(filter_ids) - for hits in search_res: - ids = hits.ids - assert set(ids).issubset(filter_ids_set) @pytest.mark.tags(CaseLabel.L2) @pytest.mark.parametrize("bool_type", [True, False, "true", "false"]) @@ -3149,13 +3117,10 @@ def test_search_with_expression_bool(self, _async, bool_type, null_data_percent) dim = 64 auto_id = True enable_dynamic_field = False - collection_w, _vectors, _, insert_ids = self.init_collection_general(prefix, True, nb, - is_all_data_type=True, - auto_id=auto_id, - dim=dim, is_index=False, - enable_dynamic_field=enable_dynamic_field, - nullable_fields={ct.default_bool_field_name: null_data_percent})[0:4] - + collection_w, _vectors, _, insert_ids = \ + self.init_collection_general(prefix, True, nb, is_all_data_type=True, auto_id=auto_id, + dim=dim, is_index=False, enable_dynamic_field=enable_dynamic_field, + nullable_fields={ct.default_bool_field_name: null_data_percent})[0:4] # 2. create index and load vector_name_list = cf.extract_vector_field_name_list(collection_w) vector_name_list.append(ct.default_float_vec_field_name) @@ -3207,8 +3172,7 @@ def test_search_with_expression_bool(self, _async, bool_type, null_data_percent) assert set(ids).issubset(filter_ids_set) @pytest.mark.tags(CaseLabel.L2) - @pytest.mark.parametrize("expression", cf.gen_array_field_expressions()) - def test_search_with_expression_array(self, expression, _async, null_data_percent): + def test_search_with_expression_array(self, null_data_percent): """ target: test search with different expressions method: test search with different expressions @@ -3223,14 +3187,14 @@ def test_search_with_expression_array(self, expression, _async, null_data_percen # 2. insert data array_length = 10 data = [] - for i in range(int(nb*(1-null_data_percent))): + for i in range(int(nb * (1 - null_data_percent))): arr = {ct.default_int64_field_name: i, ct.default_float_vec_field_name: cf.gen_vectors(1, ct.default_dim)[0], ct.default_int32_array_field_name: [np.int32(i) for i in range(array_length)], ct.default_float_array_field_name: [np.float32(i) for i in range(array_length)], ct.default_string_array_field_name: [str(i) for i in range(array_length)]} data.append(arr) - for i in range(int(nb*(1-null_data_percent)), nb): + for i in range(int(nb * (1 - null_data_percent)), nb): arr = {ct.default_int64_field_name: i, ct.default_float_vec_field_name: cf.gen_vectors(1, ct.default_dim)[0], ct.default_int32_array_field_name: [np.int32(i) for i in range(array_length)], @@ -3239,33 +3203,49 @@ def test_search_with_expression_array(self, expression, _async, null_data_percen data.append(arr) collection_w.insert(data) - # 3. filter result with expression in collection - expression = expression.replace("&&", "and").replace("||", "or") - filter_ids = [] - for i in range(nb): - int32_array = data[i][ct.default_int32_array_field_name] - float_array = data[i][ct.default_float_array_field_name] - string_array = data[i][ct.default_string_array_field_name] - if ct.default_string_array_field_name in expression and string_array is None: - continue - if not expression or eval(expression): - filter_ids.append(i) - - # 4. create index + # 3. create index collection_w.create_index("float_vector", ct.default_index) collection_w.load() - # 5. search with expression - log.info("test_search_with_expression: searching with expression: %s" % expression) - search_res, _ = collection_w.search(vectors[:default_nq], default_search_field, - default_search_params, nb, expression, _async=_async) - if _async: - search_res.done() - search_res = search_res.result() - - for hits in search_res: - ids = hits.ids - assert set(ids) == set(filter_ids) + # 4. filter result with expression in collection + for _async in [False, True]: + for expressions in cf.gen_array_field_expressions_and_templates(): + log.debug(f"search with expression: {expressions} with async={_async}") + expr = expressions[0].replace("&&", "and").replace("||", "or") + filter_ids = [] + for i in range(nb): + int32_array = data[i][ct.default_int32_array_field_name] + float_array = data[i][ct.default_float_array_field_name] + string_array = data[i][ct.default_string_array_field_name] + if ct.default_string_array_field_name in expr and string_array is None: + continue + if not expr or eval(expr): + filter_ids.append(i) + + # 5. search with expression + search_res, _ = collection_w.search(vectors[:default_nq], default_search_field, + default_search_params, limit=nb, + expr=expr, _async=_async) + if _async: + search_res.done() + search_res = search_res.result() + for hits in search_res: + ids = hits.ids + assert set(ids) == set(filter_ids) + + # 6. search again with expression template + expr = cf.get_expr_from_template(expressions[1]).replace("&&", "and").replace("||", "or") + expr_params = cf.get_expr_params_from_template(expressions[1]) + search_res, _ = collection_w.search(vectors[:default_nq], default_search_field, + default_search_params, limit=nb, + expr=expr, expr_params=expr_params, + _async=_async) + if _async: + search_res.done() + search_res = search_res.result() + for hits in search_res: + ids = hits.ids + assert set(ids) == set(filter_ids) @pytest.mark.tags(CaseLabel.L2) @pytest.mark.parametrize("exists", ["exists"]) @@ -3316,9 +3296,7 @@ def test_search_with_expression_exists(self, exists, json_field_name, _async): "_async": _async}) @pytest.mark.tags(CaseLabel.L2) - @pytest.mark.skip(reason="issue 24514") - @pytest.mark.parametrize("expression", cf.gen_normal_expressions_field(default_float_field_name)) - def test_search_with_expression_auto_id(self, expression, _async): + def test_search_with_expression_auto_id(self, _async): """ target: test search with different expressions method: test search with different expressions with auto id @@ -3328,25 +3306,9 @@ def test_search_with_expression_auto_id(self, expression, _async): nb = 1000 dim = 64 enable_dynamic_field = True - collection_w, _vectors, _, insert_ids = self.init_collection_general(prefix, True, nb, - auto_id=True, - dim=dim, - is_index=False, - enable_dynamic_field=enable_dynamic_field)[0:4] - - # filter result with expression in collection - _vectors = _vectors[0] - expression = expression.replace("&&", "and").replace("||", "or") - filter_ids = [] - for i, _id in enumerate(insert_ids): - if enable_dynamic_field: - exec( - f"{default_float_field_name} = _vectors[i][f'{default_float_field_name}']") - else: - exec( - f"{default_float_field_name} = _vectors.{default_float_field_name}[i]") - if not expression or eval(expression): - filter_ids.append(_id) + collection_w, _vectors, _, insert_ids = \ + self.init_collection_general(prefix, True, nb, auto_id=True, dim=dim, + is_index=False, enable_dynamic_field=enable_dynamic_field)[0:4] # 2. create index index_param = {"index_type": "IVF_FLAT", @@ -3354,27 +3316,60 @@ def test_search_with_expression_auto_id(self, expression, _async): collection_w.create_index("float_vector", index_param) collection_w.load() - # 3. search with different expressions - log.info( - "test_search_with_expression_auto_id: searching with expression: %s" % expression) - vectors = [[random.random() for _ in range(dim)] - for _ in range(default_nq)] - search_res, _ = collection_w.search(vectors[:default_nq], default_search_field, - default_search_params, nb, expression, - _async=_async, - check_task=CheckTasks.check_search_results, - check_items={"nq": default_nq, - "ids": insert_ids, - "limit": min(nb, len(filter_ids)), - "_async": _async}) - if _async: - search_res.done() - search_res = search_res.result() - - filter_ids_set = set(filter_ids) - for hits in search_res: - ids = hits.ids - assert set(ids).issubset(filter_ids_set) + # filter result with expression in collection + search_vectors = [[random.random() for _ in range(dim)] + for _ in range(default_nq)] + _vectors = _vectors[0] + for expressions in cf.gen_normal_expressions_and_templates_field(default_float_field_name): + log.debug(f"search with expression: {expressions}") + expr = expressions[0].replace("&&", "and").replace("||", "or") + filter_ids = [] + for i, _id in enumerate(insert_ids): + if enable_dynamic_field: + exec( + f"{default_float_field_name} = _vectors[i][f'{default_float_field_name}']") + else: + exec( + f"{default_float_field_name} = _vectors.{default_float_field_name}[i]") + if not expr or eval(expr): + filter_ids.append(_id) + # 3. search expressions + search_res, _ = collection_w.search(search_vectors[:default_nq], default_search_field, + default_search_params, + limit=nb, expr=expr, + _async=_async, + check_task=CheckTasks.check_search_results, + check_items={"nq": default_nq, + "ids": insert_ids, + "limit": min(nb, len(filter_ids)), + "_async": _async}) + if _async: + search_res.done() + search_res = search_res.result() + filter_ids_set = set(filter_ids) + for hits in search_res: + ids = hits.ids + assert set(ids).issubset(filter_ids_set) + + # search again with expression template + expr = cf.get_expr_from_template(expressions[1]).replace("&&", "and").replace("||", "or") + expr_params = cf.get_expr_params_from_template(expressions[1]) + search_res, _ = collection_w.search(search_vectors[:default_nq], default_search_field, + default_search_params, + limit=nb, expr=expr, expr_params=expr_params, + _async=_async, + check_task=CheckTasks.check_search_results, + check_items={"nq": default_nq, + "ids": insert_ids, + "limit": min(nb, len(filter_ids)), + "_async": _async}) + if _async: + search_res.done() + search_res = search_res.result() + filter_ids_set = set(filter_ids) + for hits in search_res: + ids = hits.ids + assert set(ids).issubset(filter_ids_set) @pytest.mark.tags(CaseLabel.L2) def test_search_expression_all_data_type(self, nq, _async, null_data_percent): @@ -3394,12 +3389,10 @@ def test_search_expression_all_data_type(self, nq, _async, null_data_percent): ct.default_float_field_name: null_data_percent, ct.default_double_field_name: null_data_percent, ct.default_string_field_name: null_data_percent} - collection_w, _, _, insert_ids = self.init_collection_general(prefix, True, nb, - is_all_data_type=True, - auto_id=auto_id, - dim=dim, - multiple_dim_array=[dim, dim], - nullable_fields=nullable_fields)[0:4] + collection_w, _, _, insert_ids = \ + self.init_collection_general(prefix, True, nb, is_all_data_type=True, + auto_id=auto_id, dim=dim, multiple_dim_array=[dim, dim], + nullable_fields=nullable_fields)[0:4] # 2. search log.info("test_search_expression_all_data_type: Searching collection %s" % collection_w.name) @@ -3445,7 +3438,7 @@ def test_search_expression_different_data_type(self, field, null_data_percent): nullable_fields = {field: null_data_percent} default_schema = cf.gen_collection_schema_all_datatype(nullable_fields=nullable_fields) collection_w = self.init_collection_wrap(schema=default_schema) - collection_w = cf.insert_data(collection_w, is_all_data_type=True, insert_offset=offset-1000, + collection_w = cf.insert_data(collection_w, is_all_data_type=True, insert_offset=offset - 1000, nullable_fields=nullable_fields)[0] # 2. create index and load @@ -3956,26 +3949,26 @@ def test_search_with_output_field_wildcard(self, wildcard_output_fields, _async) "output_fields": output_fields}) @pytest.mark.tags(CaseLabel.L2) - @pytest.mark.parametrize("invalid_output_fields", [["%"], [""], ["-"]]) - def test_search_with_invalid_output_fields(self, invalid_output_fields): + def test_search_with_invalid_output_fields(self): """ target: test search with output fields using wildcard method: search with one output_field (wildcard) expected: search success """ # 1. initialize with data + invalid_output_fields = [["%"], [""], ["-"]] auto_id = False collection_w, _, _, insert_ids = self.init_collection_general(prefix, True, auto_id=auto_id)[0:4] # 2. search - log.info("test_search_with_output_field_wildcard: Searching collection %s" % collection_w.name) - error1 = {"err_code": 65535, "err_msg": "field %s not exist" % invalid_output_fields[0]} - error2 = {"err_code": 1, "err_msg": "`output_fields` value %s is illegal" % invalid_output_fields[0]} - error = error2 if invalid_output_fields == [""] else error1 - collection_w.search(vectors[:default_nq], default_search_field, - default_search_params, default_limit, - default_search_exp, - output_fields=invalid_output_fields, - check_task=CheckTasks.err_res, check_items=error) + for field in invalid_output_fields: + error1 = {ct.err_code: 999, ct.err_msg: "field %s not exist" % field[0]} + error2 = {ct.err_code: 999, ct.err_msg: "`output_fields` value %s is illegal" % field} + error = error2 if field == [""] else error1 + collection_w.search(vectors[:default_nq], default_search_field, + default_search_params, default_limit, + default_search_exp, + output_fields=field, + check_task=CheckTasks.err_res, check_items=error) @pytest.mark.tags(CaseLabel.L2) def test_search_multi_collections(self, nq, _async): @@ -4022,11 +4015,11 @@ def test_search_concurrent_multi_threads(self, nq, _async, null_data_percent): enable_dynamic_field = False threads_num = 10 threads = [] - collection_w, _, _, insert_ids = self.init_collection_general(prefix, True, nb, - auto_id=auto_id, dim=dim, - enable_dynamic_field=enable_dynamic_field, - nullable_fields={ct.default_string_field_name: - null_data_percent})[0:4] + collection_w, _, _, insert_ids = \ + self.init_collection_general(prefix, True, nb, auto_id=auto_id, dim=dim, + enable_dynamic_field=enable_dynamic_field, + nullable_fields={ct.default_string_field_name: null_data_percent})[0:4] + def search(collection_w): vectors = [[random.random() for _ in range(dim)] for _ in range(nq)] @@ -4064,14 +4057,14 @@ def test_search_concurrent_two_collections_nullable(self, nq, _async): enable_dynamic_field = False threads_num = 10 threads = [] - collection_w_1, _, _, insert_ids = self.init_collection_general(prefix, False, nb, - auto_id=True, dim=dim, - enable_dynamic_field=enable_dynamic_field, - nullable_fields={ct.default_json_field_name:1})[0:4] - collection_w_2, _, _, insert_ids = self.init_collection_general(prefix, False, nb, - auto_id=True, dim=dim, - enable_dynamic_field=enable_dynamic_field, - nullable_fields={ct.default_json_field_name: 1})[0:4] + collection_w_1, _, _, insert_ids = \ + self.init_collection_general(prefix, False, nb, auto_id=True, dim=dim, + enable_dynamic_field=enable_dynamic_field, + nullable_fields={ct.default_json_field_name: 1})[0:4] + collection_w_2, _, _, insert_ids = \ + self.init_collection_general(prefix, False, nb, auto_id=True, dim=dim, + enable_dynamic_field=enable_dynamic_field, + nullable_fields={ct.default_json_field_name: 1})[0:4] collection_w_1.release() collection_w_2.release() # insert data @@ -4195,11 +4188,10 @@ def test_search_with_expression_large(self): nb = 10000 dim = 64 enable_dynamic_field = True - collection_w, _, _, insert_ids = self.init_collection_general(prefix, True, - nb, dim=dim, - is_index=False, - enable_dynamic_field=enable_dynamic_field, - with_json=False)[0:4] + collection_w, _, _, insert_ids = \ + self.init_collection_general(prefix, True, nb, dim=dim, is_index=False, + enable_dynamic_field=enable_dynamic_field, + with_json=False)[0:4] # 2. create index index_param = {"index_type": "IVF_FLAT", "metric_type": "COSINE", "params": {"nlist": 100}} @@ -4230,11 +4222,10 @@ def test_search_with_expression_large_two(self): nb = 10000 dim = 64 enable_dynamic_field = True - collection_w, _, _, insert_ids = self.init_collection_general(prefix, True, - nb, dim=dim, - is_index=False, - enable_dynamic_field=enable_dynamic_field, - with_json=False)[0:4] + collection_w, _, _, insert_ids = \ + self.init_collection_general(prefix, True, nb, dim=dim, is_index=False, + enable_dynamic_field=enable_dynamic_field, + with_json=False)[0:4] # 2. create index index_param = {"index_type": "IVF_FLAT", "metric_type": "COSINE", "params": {"nlist": 100}} @@ -4243,7 +4234,7 @@ def test_search_with_expression_large_two(self): nums = 5000 vectors = [[random.random() for _ in range(dim)] for _ in range(nums)] - vectors_id = [random.randint(0, nums)for _ in range(nums)] + vectors_id = [random.randint(0, nums) for _ in range(nums)] expression = f"{default_int64_field_name} in {vectors_id}" search_res, _ = collection_w.search(vectors, default_search_field, default_search_params, default_limit, expression, @@ -4268,10 +4259,9 @@ def test_search_with_consistency_bounded(self, nq, _async): dim = 64 auto_id = True enable_dynamic_field = False - collection_w, _, _, insert_ids = self.init_collection_general(prefix, True, nb_old, - auto_id=auto_id, - dim=dim, - enable_dynamic_field=enable_dynamic_field)[0:4] + collection_w, _, _, insert_ids = \ + self.init_collection_general(prefix, True, nb_old, auto_id=auto_id, + dim=dim, enable_dynamic_field=enable_dynamic_field)[0:4] # 2. search for original data after load vectors = [[random.random() for _ in range(dim)] for _ in range(nq)] collection_w.search(vectors[:nq], default_search_field, @@ -4317,8 +4307,7 @@ def test_search_with_consistency_strong(self, nq, _async): auto_id = False enable_dynamic_field = False collection_w, _, _, insert_ids = self.init_collection_general(prefix, True, nb_old, - auto_id=auto_id, - dim=dim, + auto_id=auto_id, dim=dim, enable_dynamic_field=enable_dynamic_field)[0:4] # 2. search for original data after load vectors = [[random.random() for _ in range(dim)] for _ in range(nq)] @@ -4366,8 +4355,7 @@ def test_search_with_consistency_eventually(self, nq, _async): auto_id = True enable_dynamic_field = True collection_w, _, _, insert_ids = self.init_collection_general(prefix, True, nb_old, - auto_id=auto_id, - dim=dim, + auto_id=auto_id, dim=dim, enable_dynamic_field=enable_dynamic_field)[0:4] # 2. search for original data after load vectors = [[random.random() for _ in range(dim)] for _ in range(nq)] @@ -4409,8 +4397,7 @@ def test_search_with_consistency_session(self, nq, _async): auto_id = False enable_dynamic_field = True collection_w, _, _, insert_ids = self.init_collection_general(prefix, True, nb_old, - auto_id=auto_id, - dim=dim, + auto_id=auto_id, dim=dim, enable_dynamic_field=enable_dynamic_field)[0:4] # 2. search for original data after load vectors = [[random.random() for _ in range(dim)] for _ in range(nq)] @@ -4569,8 +4556,7 @@ def test_search_partition_naming_rules_without_index(self, nq, partition_name): enable_dynamic_field = False self._connect() collection_w, _, _, insert_ids = self.init_collection_general(prefix, False, nb, - auto_id=auto_id, - dim=dim, + auto_id=auto_id, dim=dim, enable_dynamic_field=enable_dynamic_field)[0:4] collection_w.create_partition(partition_name) insert_ids = cf.insert_data(collection_w, nb, auto_id=auto_id, dim=dim, @@ -4604,8 +4590,7 @@ def test_search_partition_naming_rules_with_index(self, nq, partition_name, inde auto_id = False enable_dynamic_field = True self._connect() - collection_w, _, _, insert_ids = self.init_collection_general(prefix, False, nb, - auto_id=auto_id, + collection_w, _, _, insert_ids = self.init_collection_general(prefix, False, nb, auto_id=auto_id, dim=dim, is_index=False, enable_dynamic_field=enable_dynamic_field)[0:4] collection_w.create_partition(partition_name) @@ -4765,7 +4750,7 @@ def test_search_repeatedly_ivf_index_different_limit(self, index): @pytest.mark.tags(CaseLabel.L2) @pytest.mark.parametrize("metrics", ct.binary_metrics[:2]) @pytest.mark.parametrize("index", ["BIN_FLAT", "BIN_IVF_FLAT"]) - @pytest.mark.parametrize("dim", [32768, 65536, ct.max_binary_vector_dim-8, ct.max_binary_vector_dim]) + @pytest.mark.parametrize("dim", [32768, 65536, ct.max_binary_vector_dim - 8, ct.max_binary_vector_dim]) def test_binary_indexed_large_dim_vectors_search(self, dim, metrics, index): """ target: binary vector large dim search @@ -4922,7 +4907,7 @@ def test_search_flat_top_k(self, get_nq): collection_w.search(vectors[:nq], default_search_field, default_search_params, top_k, check_task=CheckTasks.err_res, check_items={"err_code": 65535, - "err_msg": f"topk [{top_k}] is invalid, top k should be in range" + "err_msg": f"topk [{top_k}] is invalid, it should be in range" f" [1, 16384], but got {top_k}"}) @pytest.mark.tags(CaseLabel.L2) @@ -5063,10 +5048,9 @@ def test_search_ip_brute_force(self, nb, dim): top_k = 1 # 1. initialize with data - collection_w, insert_entities, _, insert_ids, _ = self.init_collection_general(prefix, True, nb, - is_binary=False, - is_index=False, - dim=dim)[0:5] + collection_w, insert_entities, _, insert_ids, _ = \ + self.init_collection_general(prefix, True, nb, is_binary=False, + is_index=False, dim=dim)[0:5] flat_index = {"index_type": "FLAT", "params": {}, "metric_type": "IP"} collection_w.create_index(ct.default_float_vec_field_name, flat_index) insert_vectors = insert_entities[0][default_search_field].tolist() @@ -5297,7 +5281,7 @@ def test_search_without_connect(self): default_search_exp, check_task=CheckTasks.err_res, check_items={"err_code": 1, - "err_msg": "'should create connect first.'"}) + "err_msg": "should create connection first"}) @pytest.mark.tags(CaseLabel.L2) # @pytest.mark.timeout(300) @@ -5460,12 +5444,14 @@ def test_search_array_with_inverted_index(self, array_element_data_type): additional_params = {"max_length": 1000} if array_element_data_type == DataType.VARCHAR else {} fields = [ FieldSchema(name="id", dtype=DataType.INT64, is_primary=True), - FieldSchema(name="contains", dtype=DataType.ARRAY, element_type=array_element_data_type, max_capacity=2000, **additional_params), + FieldSchema(name="contains", dtype=DataType.ARRAY, element_type=array_element_data_type, max_capacity=2000, + **additional_params), FieldSchema(name="contains_any", dtype=DataType.ARRAY, element_type=array_element_data_type, max_capacity=2000, **additional_params), FieldSchema(name="contains_all", dtype=DataType.ARRAY, element_type=array_element_data_type, max_capacity=2000, **additional_params), - FieldSchema(name="equals", dtype=DataType.ARRAY, element_type=array_element_data_type, max_capacity=2000, **additional_params), + FieldSchema(name="equals", dtype=DataType.ARRAY, element_type=array_element_data_type, max_capacity=2000, + **additional_params), FieldSchema(name="array_length_field", dtype=DataType.ARRAY, element_type=array_element_data_type, max_capacity=2000, **additional_params), FieldSchema(name="array_access", dtype=DataType.ARRAY, element_type=array_element_data_type, @@ -5487,7 +5473,7 @@ def test_search_array_with_inverted_index(self, array_element_data_type): expr = item["expr"] ground_truth_candidate = item["ground_truth"] res, _ = collection_w.search( - data = [np.array([random.random() for j in range(128)], dtype=np.dtype("float32"))], + data=[np.array([random.random() for j in range(128)], dtype=np.dtype("float32"))], anns_field="emb", param={"metric_type": "L2", "params": {"M": 32, "efConstruction": 360}}, limit=10, @@ -5502,7 +5488,6 @@ def test_search_array_with_inverted_index(self, array_element_data_type): class TestSearchString(TestcaseBase): - """ ****************************************************************** The following cases are used to test search about string @@ -5752,10 +5737,9 @@ def test_search_with_different_string_expr(self, expression, _async): dim = 64 nb = 1000 enable_dynamic_field = True - collection_w, _vectors, _, insert_ids = self.init_collection_general(prefix, True, - nb, dim=dim, - is_index=False, - enable_dynamic_field=enable_dynamic_field)[0:4] + collection_w, _vectors, _, insert_ids = \ + self.init_collection_general(prefix, True, nb, dim=dim, + is_index=False, enable_dynamic_field=enable_dynamic_field)[0:4] # filter result with expression in collection _vectors = _vectors[0] @@ -5807,11 +5791,9 @@ def test_search_string_field_is_primary_binary(self, _async): """ dim = 64 # 1. initialize with binary data - collection_w, _, binary_raw_vector, insert_ids = self.init_collection_general(prefix, True, 2, - is_binary=True, - dim=dim, - is_index=False, - primary_field=ct.default_string_field_name)[0:4] + collection_w, _, binary_raw_vector, insert_ids = \ + self.init_collection_general(prefix, True, 2, is_binary=True, dim=dim, + is_index=False, primary_field=ct.default_string_field_name)[0:4] # 2. create index default_index = {"index_type": "BIN_IVF_FLAT", "params": {"nlist": 128}, "metric_type": "JACCARD"} @@ -5839,14 +5821,12 @@ def test_search_string_field_binary(self, _async): collection search uses string expr in string field, string field is not primary expected: Search successfully """ - # 1. initialize with binary data - dim = 128 - auto_id = True - collection_w, _, binary_raw_vector, insert_ids = self.init_collection_general(prefix, True, 2, - is_binary=True, - auto_id=auto_id, - dim=dim, - is_index=False)[0:4] + # 1. initialize with binary data + dim = 128 + auto_id = True + collection_w, _, binary_raw_vector, insert_ids = \ + self.init_collection_general(prefix, True, 2, is_binary=True, auto_id=auto_id, + dim=dim, is_index=False)[0:4] # 2. create index default_index = {"index_type": "BIN_IVF_FLAT", "params": {"nlist": 128}, "metric_type": "JACCARD"} @@ -6013,7 +5993,7 @@ def test_search_all_index_with_compare_expr(self, _async): vectors = [[random.random() for _ in range(default_dim)] for _ in range(default_nq)] output_fields = [default_int64_field_name, - default_float_field_name, default_string_field_name] + default_float_field_name, default_string_field_name] collection_w.search(vectors[:default_nq], default_search_field, default_search_params, default_limit, expr, @@ -6041,7 +6021,7 @@ def test_search_string_field_is_primary_insert_empty(self, _async): nb = 3000 data = cf.gen_default_list_data(nb) - data[2] = [""for _ in range(nb)] + data[2] = ["" for _ in range(nb)] collection_w.insert(data=data) collection_w.load() @@ -6082,7 +6062,7 @@ def test_search_string_field_not_primary_is_empty(self, _async): nb = 3000 data = cf.gen_default_list_data(nb) insert_ids = data[0] - data[2] = [""for _ in range(nb)] + data[2] = ["" for _ in range(nb)] collection_w.insert(data) assert collection_w.num_entities == nb @@ -6212,7 +6192,7 @@ def test_search_with_pagination(self, offset, limit, _async): "_async": _async})[0] # 3. search with offset+limit res = collection_w.search(vectors[:default_nq], default_search_field, default_search_params, - limit+offset, default_search_exp, _async=_async)[0] + limit + offset, default_search_exp, _async=_async)[0] if _async: search_res.done() search_res = search_res.result() @@ -6375,8 +6355,7 @@ def test_search_with_pagination_topK(self, limit, _async): assert set(search_res[0].ids) == set(res[0].ids[offset:]) @pytest.mark.tags(CaseLabel.L2) - @pytest.mark.parametrize("expression", cf.gen_normal_expressions()) - def test_search_pagination_with_expression(self, offset, expression, _async): + def test_search_pagination_with_expression(self, offset): """ target: test search pagination with expression method: create connection, collection, insert and search with expression @@ -6386,56 +6365,87 @@ def test_search_pagination_with_expression(self, offset, expression, _async): nb = 2500 dim = 38 enable_dynamic_field = False - collection_w, _vectors, _, insert_ids = self.init_collection_general(prefix, True, nb=nb, - dim=dim, - enable_dynamic_field=enable_dynamic_field)[0:4] + collection_w, _vectors, _, insert_ids = \ + self.init_collection_general(prefix, True, nb=nb, dim=dim, + enable_dynamic_field=enable_dynamic_field)[0:4] + collection_w.load() # filter result with expression in collection _vectors = _vectors[0] - expression = expression.replace("&&", "and").replace("||", "or") - filter_ids = [] - for i, _id in enumerate(insert_ids): - if enable_dynamic_field: - int64 = _vectors[i][ct.default_int64_field_name] - float = _vectors[i][ct.default_float_field_name] - else: - int64 = _vectors.int64[i] - float = _vectors.float[i] - if not expression or eval(expression): - filter_ids.append(_id) - # 2. search - collection_w.load() - limit = min(default_limit, len(filter_ids)) - if offset >= len(filter_ids): - limit = 0 - elif len(filter_ids) - offset < default_limit: - limit = len(filter_ids) - offset - search_param = {"metric_type": "COSINE", - "params": {"nprobe": 10}, "offset": offset} - vectors = [[random.random() for _ in range(dim)] - for _ in range(default_nq)] - search_res, _ = collection_w.search(vectors[:default_nq], default_search_field, - search_param, default_limit, expression, - _async=_async, - check_task=CheckTasks.check_search_results, - check_items={"nq": default_nq, - "ids": insert_ids, - "limit": limit, - "_async": _async}) - # 3. search with offset+limit - res = collection_w.search(vectors[:default_nq], default_search_field, default_search_params, - default_limit + offset, expression, _async=_async)[0] - if _async: - res.done() - res = res.result() - search_res.done() - search_res = search_res.result() - filter_ids_set = set(filter_ids) - for hits in search_res: - ids = hits.ids - assert set(ids).issubset(filter_ids_set) - res_distance = res[0].distances[offset:] - # assert sorted(search_res[0].distances, key=numpy.float32) == sorted(res_distance, key=numpy.float32) - assert set(search_res[0].ids) == set(res[0].ids[offset:]) + for _async in [False, True]: + for expressions in cf.gen_normal_expressions_and_templates(): + log.debug(f"search with expression: {expressions} with _async: {_async}") + expr = expressions[0].replace("&&", "and").replace("||", "or") + filter_ids = [] + for i, _id in enumerate(insert_ids): + if enable_dynamic_field: + int64 = _vectors[i][ct.default_int64_field_name] + float = _vectors[i][ct.default_float_field_name] + else: + int64 = _vectors.int64[i] + float = _vectors.float[i] + if not expr or eval(expr): + filter_ids.append(_id) + # 2. search + limit = min(default_limit, len(filter_ids)) + if offset >= len(filter_ids): + limit = 0 + elif len(filter_ids) - offset < default_limit: + limit = len(filter_ids) - offset + search_param = {"metric_type": "COSINE", + "params": {"nprobe": 10}, "offset": offset} + vectors = [[random.random() for _ in range(dim)] + for _ in range(default_nq)] + search_res, _ = collection_w.search(vectors[:default_nq], default_search_field, + search_param, default_limit, + expr=expr, + _async=_async, + check_task=CheckTasks.check_search_results, + check_items={"nq": default_nq, + "ids": insert_ids, + "limit": limit, + "_async": _async}) + # 3. search with offset+limit + res = collection_w.search(vectors[:default_nq], default_search_field, default_search_params, + default_limit + offset, + expr=expr, _async=_async)[0] + if _async: + res.done() + res = res.result() + search_res.done() + search_res = search_res.result() + filter_ids_set = set(filter_ids) + for hits in search_res: + ids = hits.ids + assert set(ids).issubset(filter_ids_set) + assert set(search_res[0].ids) == set(res[0].ids[offset:]) + + # 4. search again with expression template + expr = cf.get_expr_from_template(expressions[1]).replace("&&", "and").replace("||", "or") + expr_params = cf.get_expr_params_from_template(expressions[1]) + search_res, _ = collection_w.search(vectors[:default_nq], default_search_field, + search_param, default_limit, + expr=expr, expr_params=expr_params, + _async=_async, + check_task=CheckTasks.check_search_results, + check_items={"nq": default_nq, + "ids": insert_ids, + "limit": limit, + "_async": _async}) + # 3. search with offset+limit + res = collection_w.search(vectors[:default_nq], default_search_field, default_search_params, + default_limit + offset, + expr=expr, expr_params=expr_params, _async=_async)[0] + if _async: + res.done() + res = res.result() + search_res.done() + search_res = search_res.result() + filter_ids_set = set(filter_ids) + for hits in search_res: + ids = hits.ids + assert set(ids).issubset(filter_ids_set) + assert set(search_res[0].ids) == set(res[0].ids[offset:]) + @pytest.mark.tags(CaseLabel.L2) def test_search_pagination_with_index_partition(self, offset, _async): @@ -6693,7 +6703,7 @@ def test_search_sparse_with_pagination(self, offset): auto_id = False collection_w, _, _, insert_ids = \ self.init_collection_general( - prefix, True, auto_id=auto_id, vector_data_type=ct.sparse_vector)[0:4] + prefix, True, auto_id=auto_id, vector_data_type=ct.sparse_vector)[0:4] # 2. search with offset+limit search_param = {"metric_type": "IP", "params": {"drop_ratio_search": "0.2"}, "offset": offset} search_vectors = cf.gen_default_list_sparse_data()[-1][-2:] @@ -6742,7 +6752,7 @@ def test_search_pagination_with_invalid_offset_type(self, offset): default_search_exp, check_task=CheckTasks.err_res, check_items={"err_code": 1, - "err_msg": "offset [%s] is invalid" % offset}) + "err_msg": "wrong type for offset, expect int"}) @pytest.mark.tags(CaseLabel.L1) @pytest.mark.parametrize("offset", [-1, 16385]) @@ -6762,8 +6772,7 @@ def test_search_pagination_with_invalid_offset_value(self, offset): default_search_exp, check_task=CheckTasks.err_res, check_items={"err_code": 65535, - "err_msg": "offset [%d] is invalid, should be in range " - "[1, 16384], but got %d" % (offset, offset)}) + "err_msg": f"offset [{offset}] is invalid, it should be in range [1, 16384]"}) class TestSearchDiskann(TestcaseBase): @@ -6772,6 +6781,7 @@ class TestSearchDiskann(TestcaseBase): The following cases are used to test search about diskann index ****************************************************************** """ + @pytest.fixture(scope="function", params=[32, 128]) def dim(self, request): yield request.param @@ -6803,8 +6813,7 @@ def test_search_with_diskann_index(self, _async): enable_dynamic_field = True nb = 2000 collection_w, _, _, insert_ids = self.init_collection_general(prefix, True, auto_id=auto_id, - nb=nb, dim=dim, - is_index=False, + nb=nb, dim=dim, is_index=False, enable_dynamic_field=enable_dynamic_field)[0:4] # 2. create index @@ -6819,7 +6828,7 @@ def test_search_with_diskann_index(self, _async): vectors = [[random.random() for _ in range(dim)] for _ in range(default_nq)] output_fields = [default_int64_field_name, - default_float_field_name, default_string_field_name] + default_float_field_name, default_string_field_name] collection_w.search(vectors[:default_nq], default_search_field, default_search_params, default_limit, default_search_exp, @@ -6865,41 +6874,7 @@ def test_search_with_limit_20(self, _async, search_list): "_async": _async}) @pytest.mark.tags(CaseLabel.L2) - @pytest.mark.parametrize("limit", [1]) - @pytest.mark.parametrize("search_list", [-1, 0]) - def test_search_invalid_params_with_diskann_A(self, search_list, limit): - """ - target: test delete after creating index - method: 1.create collection , insert data, primary_field is int field - 2.create diskann index - 3.search with invalid params, where topk <=20, search list [topk, 2147483647] - expected: search report an error - """ - # 1. initialize with data - dim = 90 - auto_id = False - collection_w, _, _, insert_ids = \ - self.init_collection_general(prefix, True, auto_id=auto_id, dim=dim, is_index=False)[0:4] - # 2. create index - default_index = {"index_type": "DISKANN", "metric_type": "L2", "params": {}} - collection_w.create_index(ct.default_float_vec_field_name, default_index) - collection_w.load() - default_search_params = {"metric_type": "L2", "params": {"search_list": search_list}} - vectors = [[random.random() for _ in range(dim)] for _ in range(default_nq)] - output_fields = [default_int64_field_name, - default_float_field_name, default_string_field_name] - collection_w.search(vectors[:default_nq], default_search_field, - default_search_params, limit, - default_search_exp, - output_fields=output_fields, - check_task=CheckTasks.err_res, - check_items={"err_code": 65535, - "err_msg": "param search_list_size out of range [ 1,2147483647 ]"}) - - @pytest.mark.tags(CaseLabel.L2) - @pytest.mark.parametrize("limit", [20]) - @pytest.mark.parametrize("search_list", [19]) - def test_search_invalid_params_with_diskann_B(self, search_list, limit): + def test_search_invalid_params_with_diskann_B(self): """ target: test delete after creating index method: 1.create collection , insert data, primary_field is int field @@ -6909,6 +6884,7 @@ def test_search_invalid_params_with_diskann_B(self, search_list, limit): """ # 1. initialize with data dim = 100 + limit = 20 auto_id = True collection_w, _, _, insert_ids = \ self.init_collection_general(prefix, True, auto_id=auto_id, dim=dim, is_index=False)[0:4] @@ -6916,16 +6892,16 @@ def test_search_invalid_params_with_diskann_B(self, search_list, limit): default_index = {"index_type": "DISKANN", "metric_type": "L2", "params": {}} collection_w.create_index(ct.default_float_vec_field_name, default_index) collection_w.load() - default_search_params = {"metric_type": "L2", "params": {"search_list": search_list}} + default_search_params = {"metric_type": "L2", "params": {"search_list": limit-1}} vectors = [[random.random() for _ in range(dim)] for _ in range(default_nq)] - output_fields = [default_int64_field_name, default_float_field_name, default_string_field_name] + output_fields = [default_int64_field_name, default_float_field_name, default_string_field_name] collection_w.search(vectors[:default_nq], default_search_field, default_search_params, limit, default_search_exp, output_fields=output_fields, check_task=CheckTasks.err_res, - check_items={"err_code": 65535, - "err_msg": "UnknownError"}) + check_items={"err_code": 999, + "err_msg": f"should be larger than k({limit})"}) @pytest.mark.tags(CaseLabel.L2) def test_search_with_diskann_with_string_pk(self): @@ -6955,7 +6931,7 @@ def test_search_with_diskann_with_string_pk(self): vectors = [[random.random() for _ in range(dim)] for _ in range(default_nq)] output_fields = [default_int64_field_name, - default_float_field_name, default_string_field_name] + default_float_field_name, default_string_field_name] collection_w.search(vectors[:default_nq], default_search_field, default_search_params, default_limit, default_search_exp, @@ -7002,7 +6978,7 @@ def test_search_with_delete_data(self, _async): vectors = [[random.random() for _ in range(dim)] for _ in range(default_nq)] output_fields = [default_int64_field_name, - default_float_field_name, default_string_field_name] + default_float_field_name, default_string_field_name] collection_w.search(vectors[:default_nq], default_search_field, default_search_params, default_limit, default_search_exp, @@ -7053,7 +7029,7 @@ def test_search_with_diskann_and_more_index(self, _async): collection_w.delete(tmp_expr) default_search_params = {"metric_type": "COSINE", "params": {"search_list": 30}} vectors = [[random.random() for _ in range(dim)] for _ in range(default_nq)] - output_fields = [default_int64_field_name, default_float_field_name, default_string_field_name] + output_fields = [default_int64_field_name, default_float_field_name, default_string_field_name] collection_w.search(vectors[:default_nq], default_search_field, default_search_params, default_limit, default_search_exp, @@ -7098,9 +7074,9 @@ def test_search_with_scalar_field(self, _async): default_expr = "int64 in [1, 2, 3, 4]" limit = 4 default_search_params = {"metric_type": "COSINE", "params": {"nprobe": 64}} - vectors = [[random.random() for _ in range(dim)]for _ in range(default_nq)] + vectors = [[random.random() for _ in range(dim)] for _ in range(default_nq)] output_fields = [default_int64_field_name, - default_float_field_name, default_string_field_name] + default_float_field_name, default_string_field_name] search_res = collection_w.search(vectors[:default_nq], default_search_field, default_search_params, limit, default_expr, output_fields=output_fields, _async=_async, @@ -7123,7 +7099,7 @@ def test_search_diskann_search_list_equal_to_limit(self, limit, _async): # 1. initialize with data dim = 77 auto_id = False - enable_dynamic_field= False + enable_dynamic_field = False collection_w, _, _, insert_ids = self.init_collection_general(prefix, True, auto_id=auto_id, dim=dim, is_index=False, enable_dynamic_field=enable_dynamic_field)[0:4] @@ -7255,6 +7231,7 @@ def null_data_percent(self, request): # The followings are valid range search cases ****************************************************************** """ + @pytest.mark.tags(CaseLabel.L0) @pytest.mark.parametrize("vector_data_type", ct.all_dense_vector_types) @pytest.mark.parametrize("with_growing", [False, True]) @@ -7274,7 +7251,7 @@ def test_range_search_default(self, index_type, metric, vector_data_type, with_g rounds = 10 for i in range(rounds): data = cf.gen_default_list_data(nb=nb, auto_id=True, vector_data_type=vector_data_type, - with_json=False, start=i*nb) + with_json=False, start=i * nb) collection_w.insert(data) collection_w.flush() @@ -7284,9 +7261,9 @@ def test_range_search_default(self, index_type, metric, vector_data_type, with_g if with_growing is True: # add some growing segments - for j in range(rounds//2): + for j in range(rounds // 2): data = cf.gen_default_list_data(nb=nb, auto_id=True, vector_data_type=vector_data_type, - with_json=False, start=(rounds+j)*nb) + with_json=False, start=(rounds + j) * nb) collection_w.insert(data) search_params = {"params": {}} @@ -7313,7 +7290,7 @@ def test_range_search_default(self, index_type, metric, vector_data_type, with_g params = cf.get_search_params_params(index_type) params.update({"radius": radius, "range_filter": range_filter}) if index_type == "HNSW": - params.update({"ef": check_topk+100}) + params.update({"ef": check_topk + 100}) if index_type == "IVF_PQ": params.update({"max_empty_result_buckets": 100}) range_search_params = {"params": params} @@ -7323,14 +7300,16 @@ def test_range_search_default(self, index_type, metric, vector_data_type, with_g # assert len(range_ids) == check_topk log.debug(f"range search radius={radius}, range_filter={range_filter}, range results num: {len(range_ids)}") hit_rate = round(len(set(ids).intersection(set(range_ids))) / len(set(ids)), 2) - log.debug(f"{vector_data_type} range search results {index_type} {metric} with_growing {with_growing} hit_rate: {hit_rate}") - assert hit_rate >= 0.2 # issue #32630 to improve the accuracy + log.debug( + f"{vector_data_type} range search results {index_type} {metric} with_growing {with_growing} hit_rate: {hit_rate}") + assert hit_rate >= 0.2 # issue #32630 to improve the accuracy @pytest.mark.tags(CaseLabel.L2) @pytest.mark.parametrize("range_filter", [1000, 1000.0]) @pytest.mark.parametrize("radius", [0, 0.0]) @pytest.mark.skip() - def test_range_search_multi_vector_fields(self, nq, dim, auto_id, is_flush, radius, range_filter, enable_dynamic_field): + def test_range_search_multi_vector_fields(self, nq, dim, auto_id, is_flush, radius, range_filter, + enable_dynamic_field): """ target: test range search normal case method: create connection, collection, insert and search @@ -7354,7 +7333,7 @@ def test_range_search_multi_vector_fields(self, nq, dim, auto_id, is_flush, radi # 3. range search range_search_params = {"metric_type": "COSINE", "params": {"radius": radius, "range_filter": range_filter}} - vector_list = cf. extract_vector_field_name_list(collection_w) + vector_list = cf.extract_vector_field_name_list(collection_w) vector_list.append(default_search_field) for search_field in vector_list: search_res = collection_w.search(vectors[:nq], search_field, @@ -7561,14 +7540,14 @@ def test_accurate_range_search_with_multi_segments(self): # 4. get inserted original data inserted_vectors = collection_w.query(expr="int64 >= 0", output_fields=[ - ct.default_float_vec_field_name]) + ct.default_float_vec_field_name]) original_vectors = [] for single in inserted_vectors[0]: single_vector = single[ct.default_float_vec_field_name] original_vectors.append(single_vector) # 5. Calculate the searched ids - limit = 2*nb + limit = 2 * nb vectors = [[random.random() for _ in range(dim)] for _ in range(1)] distances = [] for original_vector in original_vectors: @@ -7683,10 +7662,9 @@ def test_range_search_collection_after_release_load(self, _async): # 1. initialize without data auto_id = True enable_dynamic_field = False - collection_w, _, _, insert_ids, time_stamp = self.init_collection_general(prefix, True, default_nb, - 1, auto_id=auto_id, - dim=default_dim, - enable_dynamic_field=enable_dynamic_field)[0:5] + collection_w, _, _, insert_ids, time_stamp = \ + self.init_collection_general(prefix, True, default_nb, 1, auto_id=auto_id, + dim=default_dim, enable_dynamic_field=enable_dynamic_field)[0:5] # 2. release collection log.info("test_range_search_collection_after_release_load: releasing collection %s" % collection_w.name) @@ -7764,9 +7742,9 @@ def test_range_search_new_data(self, nq): nb_old = 500 dim = 111 enable_dynamic_field = False - collection_w, _, _, insert_ids, time_stamp = self.init_collection_general(prefix, True, nb_old, - dim=dim, - enable_dynamic_field=enable_dynamic_field)[0:5] + collection_w, _, _, insert_ids, time_stamp = \ + self.init_collection_general(prefix, True, nb_old, dim=dim, + enable_dynamic_field=enable_dynamic_field)[0:5] # 2. search for original data after load vectors = [[random.random() for _ in range(dim)] for _ in range(nq)] range_search_params = {"metric_type": "COSINE", "params": {"radius": 0, @@ -7886,10 +7864,9 @@ def test_range_search_after_different_index_with_params(self, index): # 1. initialize with data dim = 96 enable_dynamic_field = False - collection_w, _, _, insert_ids, time_stamp = self.init_collection_general(prefix, True, 5000, - partition_num=1, - dim=dim, is_index=False, - enable_dynamic_field=enable_dynamic_field)[0:5] + collection_w, _, _, insert_ids, time_stamp = \ + self.init_collection_general(prefix, True, 5000, partition_num=1, + dim=dim, is_index=False, enable_dynamic_field=enable_dynamic_field)[0:5] # 2. create index and load params = cf.get_index_params_params(index) default_index = {"index_type": index, "params": params, "metric_type": "L2"} @@ -8005,13 +7982,10 @@ def test_range_search_binary_jaccard_flat_index(self, nq, _async, index, is_flus # 1. initialize with binary data dim = 48 auto_id = False - collection_w, _, binary_raw_vector, insert_ids, time_stamp = self.init_collection_general(prefix, True, 2, - is_binary=True, - auto_id=auto_id, - dim=dim, - is_index=False, - is_flush=is_flush)[ - 0:5] + collection_w, _, binary_raw_vector, insert_ids, time_stamp = \ + self.init_collection_general(prefix, True, 2, is_binary=True, + auto_id=auto_id, dim=dim, is_index=False, + is_flush=is_flush)[0:5] # 2. create index default_index = {"index_type": index, "params": { "nlist": 128}, "metric_type": "JACCARD"} @@ -8047,10 +8021,9 @@ def test_range_search_binary_jaccard_invalid_params(self, index): expected: return empty """ # 1. initialize with binary data - collection_w, _, binary_raw_vector, insert_ids, time_stamp = self.init_collection_general(prefix, True, 2, - is_binary=True, - dim=default_dim, - is_index=False,)[0:5] + collection_w, _, binary_raw_vector, insert_ids, time_stamp = \ + self.init_collection_general(prefix, True, 2, is_binary=True, + dim=default_dim, is_index=False, )[0:5] # 2. create index default_index = {"index_type": index, "params": { "nlist": 128}, "metric_type": "JACCARD"} @@ -8089,12 +8062,9 @@ def test_range_search_binary_hamming_flat_index(self, nq, _async, index, is_flus # 1. initialize with binary data dim = 80 auto_id = True - collection_w, _, binary_raw_vector, insert_ids = self.init_collection_general(prefix, True, 2, - is_binary=True, - auto_id=auto_id, - dim=dim, - is_index=False, - is_flush=is_flush)[0:4] + collection_w, _, binary_raw_vector, insert_ids = \ + self.init_collection_general(prefix, True, 2, is_binary=True, auto_id=auto_id, + dim=dim, is_index=False, is_flush=is_flush)[0:4] # 2. create index default_index = {"index_type": index, "params": { "nlist": 128}, "metric_type": "HAMMING"} @@ -8130,10 +8100,9 @@ def test_range_search_binary_hamming_invalid_params(self, index): expected: return empty """ # 1. initialize with binary data - collection_w, _, binary_raw_vector, insert_ids, time_stamp = self.init_collection_general(prefix, True, 2, - is_binary=True, - dim=default_dim, - is_index=False,)[0:5] + collection_w, _, binary_raw_vector, insert_ids, time_stamp = \ + self.init_collection_general(prefix, True, 2, is_binary=True, + dim=default_dim, is_index=False, )[0:5] # 2. create index default_index = {"index_type": index, "params": { "nlist": 128}, "metric_type": "HAMMING"} @@ -8222,10 +8191,9 @@ def test_range_search_binary_tanimoto_invalid_params(self, index): expected: return empty """ # 1. initialize with binary data - collection_w, _, binary_raw_vector, insert_ids, time_stamp = self.init_collection_general(prefix, True, 2, - is_binary=True, - dim=default_dim, - is_index=False,)[0:5] + collection_w, _, binary_raw_vector, insert_ids, time_stamp = \ + self.init_collection_general(prefix, True, 2, is_binary=True, + dim=default_dim, is_index=False, )[0:5] # 2. create index default_index = {"index_type": index, "params": { "nlist": 128}, "metric_type": "JACCARD"} @@ -8278,64 +8246,77 @@ def test_range_search_binary_without_flush(self, metrics): "limit": default_limit}) @pytest.mark.tags(CaseLabel.L1) - @pytest.mark.parametrize("expression", cf.gen_normal_expressions()) - def test_range_search_with_expression(self, expression, _async, enable_dynamic_field): + def test_range_search_with_expression(self, enable_dynamic_field): """ target: test range search with different expressions method: test range search with different expressions expected: searched successfully with correct limit(topK) """ # 1. initialize with data - nb = 1000 + nb = 2000 dim = 200 - collection_w, _vectors, _, insert_ids = self.init_collection_general(prefix, True, - nb, dim=dim, - is_index=False, - enable_dynamic_field=enable_dynamic_field)[0:4] - - # filter result with expression in collection - _vectors = _vectors[0] - expression = expression.replace("&&", "and").replace("||", "or") - filter_ids = [] - for i, _id in enumerate(insert_ids): - if enable_dynamic_field: - int64 = _vectors[i][ct.default_int64_field_name] - float = _vectors[i][ct.default_float_field_name] - else: - int64 = _vectors.int64[i] - float = _vectors.float[i] - if not expression or eval(expression): - filter_ids.append(_id) - + collection_w, _vectors, _, insert_ids = \ + self.init_collection_general(prefix, True, nb, dim=dim, + is_index=False, enable_dynamic_field=enable_dynamic_field)[0:4] # 2. create index - index_param = {"index_type": "FLAT", - "metric_type": "L2", "params": {"nlist": 100}} + index_param = {"index_type": "FLAT", "metric_type": "L2", "params": {}} collection_w.create_index("float_vector", index_param) collection_w.load() - # 3. search with expression - log.info( - "test_range_search_with_expression: searching with expression: %s" % expression) - vectors = [[random.random() for _ in range(dim)] - for _ in range(default_nq)] - range_search_params = {"metric_type": "L2", "params": {"radius": 1000, - "range_filter": 0}} - search_res, _ = collection_w.search(vectors[:default_nq], default_search_field, - range_search_params, nb, expression, - _async=_async, - check_task=CheckTasks.check_search_results, - check_items={"nq": default_nq, - "ids": insert_ids, - "limit": min(nb, len(filter_ids)), - "_async": _async}) - if _async: - search_res.done() - search_res = search_res.result() - - filter_ids_set = set(filter_ids) - for hits in search_res: - ids = hits.ids - assert set(ids).issubset(filter_ids_set) + # filter result with expression in collection + _vectors = _vectors[0] + for _async in [False, True]: + for expressions in cf.gen_normal_expressions_and_templates(): + log.debug(f"test_range_search_with_expression: {expressions} with _async={_async}") + expr = expressions[0].replace("&&", "and").replace("||", "or") + filter_ids = [] + for i, _id in enumerate(insert_ids): + if enable_dynamic_field: + int64 = _vectors[i][ct.default_int64_field_name] + float = _vectors[i][ct.default_float_field_name] + else: + int64 = _vectors.int64[i] + float = _vectors.float[i] + if not expr or eval(expr): + filter_ids.append(_id) + + # 3. search with expression + vectors = [[random.random() for _ in range(dim)] for _ in range(default_nq)] + range_search_params = {"metric_type": "L2", "params": {"radius": 1000, "range_filter": 0}} + search_res, _ = collection_w.search(vectors[:default_nq], default_search_field, + range_search_params, nb, + expr=expr, _async=_async, + check_task=CheckTasks.check_search_results, + check_items={"nq": default_nq, + "ids": insert_ids, + "limit": min(nb, len(filter_ids)), + "_async": _async}) + if _async: + search_res.done() + search_res = search_res.result() + filter_ids_set = set(filter_ids) + for hits in search_res: + ids = hits.ids + assert set(ids).issubset(filter_ids_set) + + # 4. search again with expression template + expr = cf.get_expr_from_template(expressions[1]).replace("&&", "and").replace("||", "or") + expr_params = cf.get_expr_params_from_template(expressions[1]) + search_res, _ = collection_w.search(vectors[:default_nq], default_search_field, + range_search_params, nb, + expr=expr, expr_params=expr_params, _async=_async, + check_task=CheckTasks.check_search_results, + check_items={"nq": default_nq, + "ids": insert_ids, + "limit": min(nb, len(filter_ids)), + "_async": _async}) + if _async: + search_res.done() + search_res = search_res.result() + filter_ids_set = set(filter_ids) + for hits in search_res: + ids = hits.ids + assert set(ids).issubset(filter_ids_set) @pytest.mark.tags(CaseLabel.L2) def test_range_search_with_output_field(self, _async, enable_dynamic_field): @@ -8381,10 +8362,9 @@ def test_range_search_concurrent_multi_threads(self, nq, _async, null_data_perce dim = 66 auto_id = False nb = 4000 - collection_w, _, _, insert_ids, time_stamp = self.init_collection_general(prefix, True, nb, - auto_id=auto_id, dim=dim, - nullable_fields={ct.default_float_field_name: - null_data_percent})[0:5] + collection_w, _, _, insert_ids, time_stamp = \ + self.init_collection_general(prefix, True, nb, auto_id=auto_id, dim=dim, + nullable_fields={ct.default_float_field_name: null_data_percent})[0:5] def search(collection_w): vectors = [[random.random() for _ in range(dim)] @@ -9089,29 +9069,29 @@ def test_load_partition_drop_partition_delete(self): expected: No exception """ # insert data - collection_w = self.init_collection_general(prefix, True, 200, partition_num=1, is_index=False)[0] - partition_w1, partition_w2 = collection_w.partitions + collection_w = self.init_collection_wrap(name=prefix) + p1_name = cf.gen_unique_str("par1") + partition_w1 = self.init_partition_wrap(collection_w, name=p1_name) + p2_name = cf.gen_unique_str("par2") + partition_w2 = self.init_partition_wrap(collection_w, name=p2_name) collection_w.create_index(default_search_field, default_index_params) # load && release partition_w2.load() partition_w2.release() partition_w2.drop() - # delete data - delete_ids = [i for i in range(50, 150)] - collection_w.delete(f"int64 in {delete_ids}") # search on collection, partition1, partition2 - collection_w.search(vectors[:1], field_name, default_search_params, 200, + collection_w.search(vectors[:1], field_name, default_search_params, 10, partition_names=[partition_w1.name, partition_w2.name], check_task=CheckTasks.err_res, - check_items={ct.err_code: 65535, ct.err_msg: 'not loaded'}) - collection_w.search(vectors[:1], field_name, default_search_params, 200, + check_items={ct.err_code: 999, ct.err_msg: f'partition name {partition_w2.name} not found'}) + collection_w.search(vectors[:1], field_name, default_search_params, 10, partition_names=[partition_w1.name], check_task=CheckTasks.err_res, - check_items={ct.err_code: 65535, ct.err_msg: 'not loaded'}) - collection_w.search(vectors[:1], field_name, default_search_params, 200, + check_items={ct.err_code: 999, ct.err_msg: 'failed to search: collection not loaded'}) + collection_w.search(vectors[:1], field_name, default_search_params, 10, partition_names=[partition_w2.name], check_task=CheckTasks.err_res, - check_items={ct.err_code: 65535, ct.err_msg: 'not found'}) + check_items={ct.err_code: 999, ct.err_msg: f'partition name {partition_w2.name} not found'}) @pytest.mark.tags(CaseLabel.L2) def test_compact_load_collection_release_partition(self): @@ -9936,6 +9916,7 @@ def null_data_percent(self, request): # The followings are invalid base cases ****************************************************************** """ + @pytest.mark.skip("Supported json like: 1, \"abc\", [1,2,3,4]") @pytest.mark.tags(CaseLabel.L1) def test_search_json_expression_object(self): @@ -9947,8 +9928,7 @@ def test_search_json_expression_object(self): # 1. initialize with data nq = 1 dim = 128 - collection_w, _, _, insert_ids, time_stamp = self.init_collection_general( - prefix, True, dim=dim)[0:5] + collection_w, _, _, insert_ids, time_stamp = self.init_collection_general(prefix, True, dim=dim)[0:5] # 2. search before insert time_stamp log.info("test_search_json_expression_object: searching collection %s" % collection_w.name) @@ -10172,7 +10152,7 @@ def test_search_expr_array_contains(self, expr_prefix): collection_w = self.init_collection_wrap(schema=schema) # 2. insert data - string_field_value = [[str(j) for j in range(i, i+3)] for i in range(ct.default_nb)] + string_field_value = [[str(j) for j in range(i, i + 3)] for i in range(ct.default_nb)] data = cf.gen_array_dataframe_data() data[ct.default_string_array_field_name] = string_field_value collection_w.insert(data) @@ -10289,13 +10269,16 @@ def test_search_expr_array_contains_invalid(self, expr_prefix): # 3. search collection_w.load() expression = f"{expr_prefix}({ct.default_string_array_field_name}, '1000')" + error = {ct.err_code: 1100, + ct.err_msg: f"cannot parse expression: {expression}, " + f"error: ContainsAll operation element must be an array"} + if expr_prefix in ["array_contains_any", "ARRAY_CONTAINS_ANY"]: + error = {ct.err_code: 1100, + ct.err_msg: f"cannot parse expression: {expression}, " + f"error: ContainsAny operation element must be an array"} collection_w.search(vectors[:default_nq], default_search_field, {}, limit=ct.default_nb, expr=expression, - check_task=CheckTasks.err_res, - check_items={ct.err_code: 1100, - ct.err_msg: "failed to create query plan: cannot parse " - "expression: %s, error: contains_any operation " - "element must be an array" % expression}) + check_task=CheckTasks.err_res, check_items=error) class TestSearchIterator(TestcaseBase): @@ -10514,7 +10497,7 @@ def test_search_iterator_invalid_nq(self): collection_w.search_iterator(vectors[:2], field_name, search_params, batch_size, check_task=CheckTasks.err_res, check_items={"err_code": 1, - "err_msg": "Not support multiple vector iterator at present"}) + "err_msg": "Not support search iteration over multiple vectors at present"}) class TestSearchGroupBy(TestcaseBase): @@ -10598,7 +10581,7 @@ def test_search_group_by_unsupported_field(self, grpby_unsupported_field): """ metric = "IP" collection_w = self.init_collection_general(prefix, insert_data=True, is_index=False, - is_all_data_type=True, with_json=True,)[0] + is_all_data_type=True, with_json=True, )[0] _index = {"index_type": "HNSW", "metric_type": metric, "params": {"M": 16, "efConstruction": 128}} collection_w.create_index(ct.default_float_vec_field_name, index_params=_index) collection_w.load() @@ -10628,7 +10611,7 @@ def test_search_group_by_unsupported_index(self, index): verify: the error code and msg """ if index in ["HNSW", "IVF_FLAT", "FLAT", "IVF_SQ8", "DISKANN"]: - pass # Only HNSW and IVF_FLAT are supported + pass # Only HNSW and IVF_FLAT are supported else: metric = "L2" collection_w = self.init_collection_general(prefix, insert_data=True, is_index=False, @@ -10845,6 +10828,7 @@ def vector_data_type(self, request): # The following are valid base cases for hybrid_search ****************************************************************** """ + @pytest.mark.tags(CaseLabel.L1) @pytest.mark.parametrize("offset", [0, 5]) @pytest.mark.parametrize("primary_field", [ct.default_int64_field_name, ct.default_string_field_name]) @@ -11089,12 +11073,12 @@ def test_hybrid_search_as_search(self, nq, primary_field, is_flush): "ids": insert_ids, "limit": default_limit})[0] search_res = collection_w.search(vectors[:nq], search_field, - default_search_params, default_limit, - default_search_exp, - check_task=CheckTasks.check_search_results, - check_items={"nq": nq, - "ids": insert_ids, - "limit": default_limit})[0] + default_search_params, default_limit, + default_search_exp, + check_task=CheckTasks.check_search_results, + check_items={"nq": nq, + "ids": insert_ids, + "limit": default_limit})[0] # 4. the effect of hybrid search to one field should equal to search log.info("The distance list is:\n") for i in range(nq): @@ -11781,7 +11765,7 @@ def test_hybrid_search_RRFRanker_default_parameter(self, primary_field): """ # 1. initialize collection with data collection_w, _, _, insert_ids, time_stamp = \ - self.init_collection_general(prefix, True, dim=default_dim, primary_field=primary_field, + self.init_collection_general(prefix, True, dim=default_dim, primary_field=primary_field, multiple_dim_array=[default_dim, default_dim])[0:5] # 2. extract vector field name vector_name_list = cf.extract_vector_field_name_list(collection_w) @@ -11810,25 +11794,25 @@ def test_hybrid_search_RRFRanker_default_parameter(self, primary_field): "limit": default_limit})[0] ids = search_res[0].ids for j in range(len(ids)): - search_res_dict[ids[j]] = 1/(j + 60 +1) + search_res_dict[ids[j]] = 1 / (j + 60 + 1) search_res_dict_array.append(search_res_dict) # 4. calculate hybrid search base line for RRFRanker ids_answer, score_answer = cf.get_hybrid_search_base_results_rrf(search_res_dict_array) # 5. hybrid search hybrid_search_0 = collection_w.hybrid_search(req_list, RRFRanker(), default_limit, - check_task=CheckTasks.check_search_results, - check_items={"nq": 1, - "ids": insert_ids, - "limit": default_limit})[0] + check_task=CheckTasks.check_search_results, + check_items={"nq": 1, + "ids": insert_ids, + "limit": default_limit})[0] # 6. compare results through the re-calculated distances for i in range(len(score_answer[:default_limit])): assert score_answer[i] - hybrid_search_0[0].distances[i] < hybrid_search_epsilon # 7. run hybrid search with the same parameters twice, and compare the results hybrid_search_1 = collection_w.hybrid_search(req_list, RRFRanker(), default_limit, - check_task=CheckTasks.check_search_results, - check_items={"nq": 1, - "ids": insert_ids, - "limit": default_limit})[0] + check_task=CheckTasks.check_search_results, + check_items={"nq": 1, + "ids": insert_ids, + "limit": default_limit})[0] assert hybrid_search_0[0].ids == hybrid_search_1[0].ids assert hybrid_search_0[0].distances == hybrid_search_1[0].distances @@ -11878,7 +11862,7 @@ def test_hybrid_search_RRFRanker_different_k(self, is_flush, k, offset): "limit": default_limit})[0] ids = search_res[0].ids for j in range(len(ids)): - search_res_dict[ids[j]] = 1/(j + k +1) + search_res_dict[ids[j]] = 1 / (j + k + 1) search_res_dict_array.append(search_res_dict) # 4. calculate hybrid search baseline for RRFRanker ids_answer, score_answer = cf.get_hybrid_search_base_results_rrf(search_res_dict_array) @@ -11946,12 +11930,12 @@ def test_hybrid_search_offset_inside_outside_params(self, primary_field, offset, "expr": "int64 > 0"} req = AnnSearchRequest(**search_param) req_list.append(req) - hybrid_res = collection_w.hybrid_search(req_list, rerank, default_limit-offset, + hybrid_res = collection_w.hybrid_search(req_list, rerank, default_limit - offset, offset=offset, check_task=CheckTasks.check_search_results, check_items={"nq": 1, "ids": insert_ids, - "limit": default_limit-offset})[0] + "limit": default_limit - offset})[0] assert hybrid_res_inside[0].distances[offset:] == hybrid_res[0].distances @@ -11987,7 +11971,7 @@ def test_hybrid_search_RRFRanker_k_out_of_range(self, k): """ # 1. initialize collection with data collection_w, _, _, insert_ids, time_stamp = \ - self.init_collection_general(prefix, True, dim=default_dim, + self.init_collection_general(prefix, True, dim=default_dim, multiple_dim_array=[default_dim, default_dim])[0:5] # 2. extract vector field name vector_name_list = cf.extract_vector_field_name_list(collection_w) @@ -12016,7 +12000,7 @@ def test_hybrid_search_RRFRanker_k_out_of_range(self, k): "limit": default_limit})[0] ids = search_res[0].ids for j in range(len(ids)): - search_res_dict[ids[j]] = 1/(j + k +1) + search_res_dict[ids[j]] = 1 / (j + k + 1) search_res_dict_array.append(search_res_dict) # 4. calculate hybrid search base line for RRFRanker ids_answer, score_answer = cf.get_hybrid_search_base_results_rrf(search_res_dict_array) @@ -12069,7 +12053,7 @@ def test_hybrid_search_different_limit_round_decimal(self, primary_field, limit) # search to get the base line of hybrid_search search_res = collection_w.search(vectors[:1], vector_name_list[i], default_search_params, limit, - default_search_exp, round_decimal= 5, + default_search_exp, round_decimal=5, check_task=CheckTasks.check_search_results, check_items={"nq": 1, "ids": insert_ids, @@ -12168,7 +12152,7 @@ def test_hybrid_search_limit_out_of_range_min(self): @pytest.mark.tags(CaseLabel.L2) @pytest.mark.parametrize("primary_field", [ct.default_int64_field_name, ct.default_string_field_name]) def test_hybrid_search_with_output_fields(self, nq, dim, auto_id, is_flush, enable_dynamic_field, - primary_field, vector_data_type): + primary_field, vector_data_type): """ target: test hybrid search normal case method: create connection, collection, insert and search @@ -12215,12 +12199,12 @@ def test_hybrid_search_with_output_fields(self, nq, dim, auto_id, is_flush, enab vectors_search = vectors[k] # 5. search to get the base line of hybrid_search search_res = collection_w.search([vectors_search], vector_name_list[i], - single_search_param, default_limit, - default_search_exp, - check_task=CheckTasks.check_search_results, - check_items={"nq": 1, - "ids": insert_ids, - "limit": default_limit})[0] + single_search_param, default_limit, + default_search_exp, + check_task=CheckTasks.check_search_results, + check_items={"nq": 1, + "ids": insert_ids, + "limit": default_limit})[0] ids = search_res[0].ids distance_array = search_res[0].distances for j in range(len(ids)): @@ -12249,7 +12233,7 @@ def test_hybrid_search_with_output_fields(self, nq, dim, auto_id, is_flush, enab @pytest.mark.tags(CaseLabel.L2) @pytest.mark.parametrize("primary_field", [ct.default_int64_field_name, ct.default_string_field_name]) def test_hybrid_search_with_output_fields_all_fields(self, nq, dim, auto_id, is_flush, enable_dynamic_field, - primary_field, vector_data_type): + primary_field, vector_data_type): """ target: test hybrid search normal case method: create connection, collection, insert and search @@ -12296,12 +12280,12 @@ def test_hybrid_search_with_output_fields_all_fields(self, nq, dim, auto_id, is_ vectors_search = vectors[k] # 5. search to get the base line of hybrid_search search_res = collection_w.search([vectors_search], vector_name_list[i], - single_search_param, default_limit, - default_search_exp, - check_task=CheckTasks.check_search_results, - check_items={"nq": 1, - "ids": insert_ids, - "limit": default_limit})[0] + single_search_param, default_limit, + default_search_exp, + check_task=CheckTasks.check_search_results, + check_items={"nq": 1, + "ids": insert_ids, + "limit": default_limit})[0] ids = search_res[0].ids distance_array = search_res[0].distances for j in range(len(ids)): @@ -12332,7 +12316,7 @@ def test_hybrid_search_with_output_fields_all_fields(self, nq, dim, auto_id, is_ @pytest.mark.tags(CaseLabel.L2) @pytest.mark.parametrize("primary_field", [ct.default_int64_field_name, ct.default_string_field_name]) def test_hybrid_search_with_output_fields_all_fields(self, nq, dim, auto_id, is_flush, enable_dynamic_field, - primary_field, vector_data_type): + primary_field, vector_data_type): """ target: test hybrid search normal case method: create connection, collection, insert and search @@ -12379,12 +12363,12 @@ def test_hybrid_search_with_output_fields_all_fields(self, nq, dim, auto_id, is_ vectors_search = vectors[k] # 5. search to get the base line of hybrid_search search_res = collection_w.search([vectors_search], vector_name_list[i], - single_search_param, default_limit, - default_search_exp, - check_task=CheckTasks.check_search_results, - check_items={"nq": 1, - "ids": insert_ids, - "limit": default_limit})[0] + single_search_param, default_limit, + default_search_exp, + check_task=CheckTasks.check_search_results, + check_items={"nq": 1, + "ids": insert_ids, + "limit": default_limit})[0] ids = search_res[0].ids distance_array = search_res[0].distances for j in range(len(ids)): @@ -12399,7 +12383,7 @@ def test_hybrid_search_with_output_fields_all_fields(self, nq, dim, auto_id, is_ score_answer_nq.append(score_answer) # 7. hybrid search hybrid_res = collection_w.hybrid_search(req_list, WeightedRanker(*weights), default_limit, - output_fields= ["*"], + output_fields=["*"], check_task=CheckTasks.check_search_results, check_items={"nq": nq, "ids": insert_ids, @@ -12410,7 +12394,8 @@ def test_hybrid_search_with_output_fields_all_fields(self, nq, dim, auto_id, is_ assert score_answer_nq[k][i] - hybrid_res[k].distances[i] < hybrid_search_epsilon @pytest.mark.tags(CaseLabel.L2) - @pytest.mark.parametrize("output_fields", [[default_search_field], [default_search_field, default_int64_field_name]]) + @pytest.mark.parametrize("output_fields", + [[default_search_field], [default_search_field, default_int64_field_name]]) @pytest.mark.parametrize("primary_field", [ct.default_int64_field_name, ct.default_string_field_name]) def test_hybrid_search_with_output_fields_sync_async(self, nq, primary_field, output_fields, _async): """ @@ -12545,7 +12530,7 @@ def test_hybrid_search_is_partition_key(self, nq, primary_field, limit, vector_d collection_w, _, _, insert_ids, time_stamp = \ self.init_collection_general(prefix, True, primary_field=primary_field, multiple_dim_array=[default_dim, default_dim], - vector_data_type = vector_data_type, + vector_data_type=vector_data_type, is_partition_key=ct.default_float_field_name)[0:5] # 2. extract vector field name vector_name_list = cf.extract_vector_field_name_list(collection_w) @@ -12617,13 +12602,14 @@ def test_hybrid_search_result_L2_order(self, nq): """ # 1. initialize collection with data collection_w, _, _, insert_ids, time_stamp = \ - self.init_collection_general(prefix, True, is_index=False, multiple_dim_array=[default_dim, default_dim])[0:5] + self.init_collection_general(prefix, True, is_index=False, + multiple_dim_array=[default_dim, default_dim])[0:5] # 2. create index vector_name_list = cf.extract_vector_field_name_list(collection_w) vector_name_list.append(ct.default_float_vec_field_name) - for i in range(len(vector_name_list)) : - default_index = { "index_type": "IVF_FLAT", "metric_type": "L2", "params": {"nlist": 128},} + for i in range(len(vector_name_list)): + default_index = {"index_type": "IVF_FLAT", "metric_type": "L2", "params": {"nlist": 128}, } collection_w.create_index(vector_name_list[i], default_index) collection_w.load() @@ -12674,7 +12660,7 @@ def test_hybrid_search_result_order(self, nq): req_list.append(req) # 4. hybrid search res = collection_w.hybrid_search(req_list, WeightedRanker(*weights), 10)[0] - is_sorted_descend = lambda lst: all(lst[i] >= lst[i+1] for i in range(len(lst)-1)) + is_sorted_descend = lambda lst: all(lst[i] >= lst[i + 1] for i in range(len(lst) - 1)) for i in range(nq): assert is_sorted_descend(res[i].distances) @@ -12687,9 +12673,9 @@ def test_hybrid_search_sparse_normal(self): """ nb, auto_id, dim, enable_dynamic_field = 20000, False, 768, False # 1. init collection - collection_w, insert_vectors, _, insert_ids = self.init_collection_general(prefix, True, nb=nb, - multiple_dim_array=[dim, dim*2], with_json=False, - vector_data_type="SPARSE_FLOAT_VECTOR")[0:4] + collection_w, insert_vectors, _, insert_ids = \ + self.init_collection_general(prefix, True, nb=nb, multiple_dim_array=[dim, dim * 2], + with_json=False, vector_data_type="SPARSE_FLOAT_VECTOR")[0:4] # 2. extract vector field name vector_name_list = cf.extract_vector_field_name_list(collection_w) # 3. prepare search params @@ -12699,7 +12685,7 @@ def test_hybrid_search_sparse_normal(self): for i in range(len(vector_name_list)): # vector = cf.gen_sparse_vectors(1, dim) - vector = insert_vectors[0][i+3][-1:] + vector = insert_vectors[0][i + 3][-1:] search_res_dict = {} search_param = { "data": vector, @@ -12716,7 +12702,7 @@ def test_hybrid_search_sparse_normal(self): )[0] ids = search_res[0].ids for j in range(len(ids)): - search_res_dict[ids[j]] = 1/(j + k +1) + search_res_dict[ids[j]] = 1 / (j + k + 1) search_res_dict_array.append(search_res_dict) # 4. calculate hybrid search base line for RRFRanker ids_answer, score_answer = cf.get_hybrid_search_base_results_rrf(search_res_dict_array) @@ -12824,8 +12810,8 @@ def test_sparse_index_enable_mmap_search(self, index): assert pro["mmap.enabled"] == 'True' collection_w.alter_index(index, {'mmap.enabled': True}) assert collection_w.index()[0].params["mmap.enabled"] == 'True' - data2 = cf.gen_default_list_sparse_data(nb=2000, start=first_nb) # id shall be continuous - all_data = [] # combine 2 insert datas for next checking + data2 = cf.gen_default_list_sparse_data(nb=2000, start=first_nb) # id shall be continuous + all_data = [] # combine 2 insert datas for next checking for i in range(len(data2)): all_data.append(data[i] + data2[i]) collection_w.insert(data2) @@ -12897,7 +12883,7 @@ def test_sparse_vector_search_output_field(self, index): check_items={"nq": default_nq, "limit": default_limit, "output_fields": ["float", "sparse_vector"] - }) + }) @pytest.mark.tags(CaseLabel.L2) @pytest.mark.parametrize("index", ct.all_index_types[9:11]) @@ -12996,7 +12982,8 @@ def null_data_percent(self, request): """ @pytest.mark.tags(CaseLabel.L0) - def test_search_normal_none_data(self, nq, dim, auto_id, is_flush, enable_dynamic_field, vector_data_type, null_data_percent): + def test_search_normal_none_data(self, nq, dim, auto_id, is_flush, enable_dynamic_field, vector_data_type, + null_data_percent): """ target: test search normal case with none data inserted method: create connection, collection with nullable fields, insert data including none, and search @@ -13040,9 +13027,10 @@ def test_search_after_none_data_all_field_datatype(self, varchar_scalar_index, n ct.default_float_field_name: null_data_percent, ct.default_double_field_name: null_data_percent, ct.default_string_field_name: null_data_percent} - collection_w, _, _, insert_ids = self.init_collection_general(prefix, True, 5000, partition_num=1, - is_all_data_type=True, dim=default_dim, - is_index=False, nullable_fields=nullable_fields)[0:4] + collection_w, _, _, insert_ids = \ + self.init_collection_general(prefix, True, 5000, partition_num=1, + is_all_data_type=True, dim=default_dim, + is_index=False, nullable_fields=nullable_fields)[0:4] # 2. create index on vector field and load index = "HNSW" params = cf.get_index_params_params(index) @@ -13120,7 +13108,8 @@ def test_search_default_value_without_insert(self, enable_dynamic_field): collection_w = self.init_collection_general(prefix, False, dim=default_dim, enable_dynamic_field=enable_dynamic_field, nullable_fields={ct.default_float_field_name: 0}, - default_value_fields={ct.default_float_field_name: np.float32(10.0)})[0] + default_value_fields={ + ct.default_float_field_name: np.float32(10.0)})[0] # 2. generate search data vectors = cf.gen_vectors_based_on_vector_type(default_nq, default_dim, "FLOAT_VECTOR") # 3. search after insert @@ -13141,14 +13130,15 @@ def test_search_after_default_data_all_field_datatype(self, varchar_scalar_index """ # 1. initialize with data default_value_fields = {ct.default_int32_field_name: np.int32(1), - ct.default_int16_field_name: np.int32(2), - ct.default_int8_field_name: np.int32(3), - ct.default_bool_field_name: True, - ct.default_float_field_name: np.float32(10.0), - ct.default_double_field_name: 10.0, - ct.default_string_field_name: "1"} + ct.default_int16_field_name: np.int32(2), + ct.default_int8_field_name: np.int32(3), + ct.default_bool_field_name: True, + ct.default_float_field_name: np.float32(10.0), + ct.default_double_field_name: 10.0, + ct.default_string_field_name: "1"} collection_w, _, _, insert_ids = self.init_collection_general(prefix, True, 5000, partition_num=1, - is_all_data_type=True, dim=default_dim, is_index=False, + is_all_data_type=True, dim=default_dim, + is_index=False, default_value_fields=default_value_fields)[0:4] # 2. create index on vector field and load index = "HNSW" @@ -13191,7 +13181,8 @@ def test_search_after_default_data_all_field_datatype(self, varchar_scalar_index "output_fields": output_fields}) @pytest.mark.tags(CaseLabel.L1) - def test_search_both_default_value_non_data(self, nq, dim, auto_id, is_flush, enable_dynamic_field, vector_data_type): + def test_search_both_default_value_non_data(self, nq, dim, auto_id, is_flush, enable_dynamic_field, + vector_data_type): """ target: test search normal case with default value set method: create connection, collection with default value set, insert and search @@ -13231,13 +13222,13 @@ def test_search_collection_with_non_default_data_after_release_load(self, nq, _a expected: search successfully """ # 1. initialize without data - nb= 2000 + nb = 2000 dim = 64 auto_id = True - collection_w, _, _, insert_ids, time_stamp = self.init_collection_general(prefix, True, nb, - 1, auto_id=auto_id, dim=dim, - nullable_fields={ct.default_string_field_name: null_data_percent}, - default_value_fields={ct.default_float_field_name: np.float32(10.0)})[0:5] + collection_w, _, _, insert_ids, time_stamp = \ + self.init_collection_general(prefix, True, nb, 1, auto_id=auto_id, dim=dim, + nullable_fields={ct.default_string_field_name: null_data_percent}, + default_value_fields={ct.default_float_field_name: np.float32(10.0)})[0:5] # 2. release collection collection_w.release() # 3. Search the pre-released collection after load @@ -13246,7 +13237,7 @@ def test_search_collection_with_non_default_data_after_release_load(self, nq, _a vectors = [[random.random() for _ in range(dim)] for _ in range(nq)] collection_w.search(vectors[:nq], default_search_field, default_search_params, default_limit, default_search_exp, _async=_async, - output_fields = [ct.default_float_field_name, ct.default_string_field_name], + output_fields=[ct.default_float_field_name, ct.default_string_field_name], check_task=CheckTasks.check_search_results, check_items={"nq": nq, "ids": insert_ids, @@ -13257,7 +13248,8 @@ def test_search_collection_with_non_default_data_after_release_load(self, nq, _a @pytest.mark.tags(CaseLabel.L1) @pytest.mark.tags(CaseLabel.GPU) - def test_search_after_different_index_with_params_none_default_data(self, varchar_scalar_index, numeric_scalar_index, + def test_search_after_different_index_with_params_none_default_data(self, varchar_scalar_index, + numeric_scalar_index, null_data_percent, _async): """ target: test search after different index @@ -13265,10 +13257,11 @@ def test_search_after_different_index_with_params_none_default_data(self, varcha expected: search successfully with limit(topK) """ # 1. initialize with data - collection_w, _, _, insert_ids = self.init_collection_general(prefix, True, 5000, partition_num=1, - is_all_data_type=True, dim=default_dim, is_index=False, - nullable_fields={ct.default_string_field_name: null_data_percent}, - default_value_fields={ct.default_float_field_name: np.float32(10.0)})[0:4] + collection_w, _, _, insert_ids = \ + self.init_collection_general(prefix, True, 5000, partition_num=1, is_all_data_type=True, + dim=default_dim, is_index=False, + nullable_fields={ct.default_string_field_name: null_data_percent}, + default_value_fields={ct.default_float_field_name: np.float32(10.0)})[0:4] # 2. create index on vector field and load index = "HNSW" params = cf.get_index_params_params(index) @@ -13310,8 +13303,9 @@ def test_search_iterator_with_none_data(self, batch_size, null_data_percent): """ # 1. initialize with data dim = 64 - collection_w = self.init_collection_general(prefix, True, dim=dim, is_index=False, - nullable_fields={ct.default_string_field_name: null_data_percent})[0] + collection_w = \ + self.init_collection_general(prefix, True, dim=dim, is_index=False, + nullable_fields={ct.default_string_field_name: null_data_percent})[0] collection_w.create_index(field_name, {"metric_type": "L2"}) collection_w.load() # 2. search iterator @@ -13428,7 +13422,6 @@ class TestSearchWithTextMatchFilter(TestcaseBase): The following cases are used to test query text match ****************************************************************** """ - @pytest.mark.tags(CaseLabel.L0) @pytest.mark.parametrize("enable_partition_key", [True, False]) @pytest.mark.parametrize("enable_inverted_index", [True, False]) @@ -13515,9 +13508,9 @@ def test_search_with_text_match_filter_normal_en( batch_size = 5000 for i in range(0, len(df), batch_size): collection_w.insert( - data[i : i + batch_size] + data[i: i + batch_size] if i + batch_size < len(df) - else data[i : len(df)] + else data[i: len(df)] ) collection_w.flush() collection_w.create_index( @@ -13544,7 +13537,7 @@ def test_search_with_text_match_filter_normal_en( if ann_field == "float32_emb": search_data = [[random.random() for _ in range(dim)]] elif ann_field == "sparse_emb": - search_data = cf.gen_sparse_vectors(1,dim=10000) + search_data = cf.gen_sparse_vectors(1, dim=10000) else: search_data = [[random.random() for _ in range(dim)]] for field in text_fields: @@ -13748,4 +13741,5 @@ def test_search_with_text_match_filter_normal_zh( assert len(res) > 0 for r in res: r = r.to_dict() - assert any([token in r["entity"][field] for token in top_10_tokens]) \ No newline at end of file + assert any([token in r["entity"][field] for token in top_10_tokens]) + diff --git a/tests/python_client/testcases/test_utility.py b/tests/python_client/testcases/test_utility.py index 01d34193b6f6f..883a2ab4270c0 100644 --- a/tests/python_client/testcases/test_utility.py +++ b/tests/python_client/testcases/test_utility.py @@ -338,7 +338,7 @@ def test_wait_for_loading_partition_not_existed(self): self.utility_wrap.wait_for_loading_complete( collection_w.name, partition_names=[ct.default_tag], check_task=CheckTasks.err_res, - check_items={ct.err_code: 200, ct.err_msg: f'partition={ct.default_tag}: partition not found'}) + check_items={ct.err_code: 200, ct.err_msg: f'partition not found[partition={ct.default_tag}]'}) @pytest.mark.tags(CaseLabel.L2) def test_drop_collection_not_existed(self): @@ -491,10 +491,11 @@ def test_rename_collection_new_invalid_value(self, get_invalid_value_collection_ collection_w, vectors, _, insert_ids, _ = self.init_collection_general(prefix) old_collection_name = collection_w.name new_collection_name = get_invalid_value_collection_name - error = {"err_code": 1100, "err_msg": "Invalid collection name: %s. the first character of a collection name mu" - "st be an underscore or letter: invalid parameter" % new_collection_name} + error = {"err_code": 1100, "err_msg": "Invalid collection name"} if new_collection_name in [None, ""]: error = {"err_code": 999, "err_msg": f"`collection_name` value {new_collection_name} is illegal"} + if new_collection_name == " ": + error = {"err_code": 999, "err_msg": "collection name should not be empty"} self.utility_wrap.rename_collection(old_collection_name, new_collection_name, check_task=CheckTasks.err_res, check_items=error) @@ -547,8 +548,7 @@ def test_rename_collection_existed_collection_alias(self): self.utility_wrap.rename_collection(old_collection_name, alias, check_task=CheckTasks.err_res, check_items={"err_code": 65535, - "err_msg": "duplicated new collection name default:{} with " - "other collection name or alias".format(alias)}) + "err_msg": f"cannot rename collection to an existing alias: {alias}"}) @pytest.mark.tags(CaseLabel.L1) def test_rename_collection_using_alias(self): @@ -747,7 +747,7 @@ def test_index_process_collection_insert_no_index(self): cw = self.init_collection_wrap(name=c_name) data = cf.gen_default_list_data(nb) cw.insert(data=data) - error = {ct.err_code: 700, ct.err_msg: f"{c_name}: index not found"} + error = {ct.err_code: 999, ct.err_msg: f"index not found[collection={c_name}]"} self.utility_wrap.index_building_progress(c_name, check_task=CheckTasks.err_res, check_items=error) @pytest.mark.tags(CaseLabel.L1)