Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

test: add text match negative test case #38982

Open
wants to merge 1 commit into
base: 2.5
Choose a base branch
from
Open
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
111 changes: 111 additions & 0 deletions tests/python_client/testcases/test_query.py
Original file line number Diff line number Diff line change
Expand Up @@ -6620,6 +6620,7 @@ def test_query_text_match_with_nullable(self):


class TestQueryTextMatchNegative(TestcaseBase):

@pytest.mark.tags(CaseLabel.L0)
def test_query_text_match_with_unsupported_tokenizer(self):
"""
Expand Down Expand Up @@ -6687,6 +6688,116 @@ def test_query_text_match_with_unsupported_tokenizer(self):
check_items=error,
)

@pytest.mark.tags(CaseLabel.L0)
@pytest.mark.parametrize("enable_partition_key", [True])
@pytest.mark.parametrize("enable_inverted_index", [True])
@pytest.mark.parametrize("tokenizer", ["standard"])
def test_query_text_match_when_enable_match_false(
self, tokenizer, enable_inverted_index, enable_partition_key
):
"""
target: test text match when field enable_match is false
method: 1. not enable text match and insert data with varchar
2. get the most common words and query with text match
3. verify the result
expected: text match failed and return error
"""
analyzer_params = {
"tokenizer": tokenizer,
}
dim = 128
fields = [
FieldSchema(name="id", dtype=DataType.INT64, is_primary=True),
FieldSchema(
name="word",
dtype=DataType.VARCHAR,
max_length=65535,
enable_analyzer=True,
enable_match=False,
is_partition_key=enable_partition_key,
analyzer_params=analyzer_params,
),
FieldSchema(
name="sentence",
dtype=DataType.VARCHAR,
max_length=65535,
enable_analyzer=True,
enable_match=False,
analyzer_params=analyzer_params,
),
FieldSchema(
name="paragraph",
dtype=DataType.VARCHAR,
max_length=65535,
enable_analyzer=True,
enable_match=False,
analyzer_params=analyzer_params,
),
FieldSchema(
name="text",
dtype=DataType.VARCHAR,
max_length=65535,
enable_analyzer=True,
enable_match=False,
analyzer_params=analyzer_params,
),
FieldSchema(name="emb", dtype=DataType.FLOAT_VECTOR, dim=dim),
]
schema = CollectionSchema(fields=fields, description="test collection")
data_size = 3000
collection_w = self.init_collection_wrap(
name=cf.gen_unique_str(prefix), schema=schema
)
fake = fake_en
if tokenizer == "jieba":
language = "zh"
fake = fake_zh
else:
language = "en"

data = [
{
"id": i,
"word": fake.word().lower(),
"sentence": fake.sentence().lower(),
"paragraph": fake.paragraph().lower(),
"text": fake.text().lower(),
"emb": [random.random() for _ in range(dim)],
}
for i in range(data_size)
]
df = pd.DataFrame(data)
log.info(f"dataframe\n{df}")
batch_size = 5000
for i in range(0, len(df), batch_size):
collection_w.insert(
data[i: i + batch_size]
if i + batch_size < len(df)
else data[i: len(df)]
)
# only if the collection is flushed, the inverted index ca be applied.
# growing segment may be not applied, although in strong consistency.
collection_w.flush()
collection_w.create_index(
"emb",
{"index_type": "IVF_SQ8", "metric_type": "L2", "params": {"nlist": 64}},
)
if enable_inverted_index:
collection_w.create_index("word", {"index_type": "INVERTED"})
collection_w.load()
# analyze the croup
text_fields = ["word", "sentence", "paragraph", "text"]
wf_map = {}
for field in text_fields:
wf_map[field] = cf.analyze_documents(df[field].tolist(), language=language)
# query single field for one token
for field in text_fields:
token = wf_map[field].most_common()[0][0]
expr = f"text_match({field}, '{token}')"
log.info(f"expr: {expr}")
res, _ = collection_w.query(expr=expr, output_fields=["id", field],
check_task=CheckTasks.err_res,
check_items={ct.err_code: 65535, ct.err_msg: "query failed"})

class TestQueryFunction(TestcaseBase):
@pytest.mark.tags(CaseLabel.L1)
Expand Down