Skip to content

Commit

Permalink
test: refine log
Browse files Browse the repository at this point in the history
Signed-off-by: zhuwenxing <wenxing.zhu@zilliz.com>
  • Loading branch information
zhuwenxing committed Sep 24, 2024
1 parent 122290c commit 4c370c5
Showing 1 changed file with 29 additions and 20 deletions.
49 changes: 29 additions & 20 deletions tests/python_client/testcases/test_query.py
Original file line number Diff line number Diff line change
Expand Up @@ -4201,7 +4201,7 @@ def test_query_text_match_normal(
FieldSchema(name="emb", dtype=DataType.FLOAT_VECTOR, dim=dim),
]
schema = CollectionSchema(fields=fields, description="test collection")
data_size = 5000
data_size = 3000
collection_w = self.init_collection_wrap(
name=cf.gen_unique_str(prefix), schema=schema
)
Expand Down Expand Up @@ -4232,7 +4232,6 @@ def test_query_text_match_normal(
if i + batch_size < len(df)
else data[i : len(df)]
)
collection_w.flush()
collection_w.create_index(
"emb",
{"index_type": "IVF_SQ8", "metric_type": "L2", "params": {"nlist": 64}},
Expand All @@ -4252,7 +4251,7 @@ def test_query_text_match_normal(
log.info(f"expr: {expr}")
res, _ = collection_w.query(expr=expr, output_fields=["id", field])
assert len(res) > 0
log.info(f"res len {len(res)} res {res}")
log.info(f"res len {len(res)}")
for r in res:
assert token in r[field]

Expand All @@ -4275,7 +4274,7 @@ def test_query_text_match_normal(
expr = f"TextMatch({field}, '{string_of_top_10_words}')"
log.info(f"expr {expr}")
res, _ = collection_w.query(expr=expr, output_fields=["id", field])
log.info(f"res len {len(res)} res {res}")
log.info(f"res len {len(res)}")
for r in res:
assert any([token in r[field] for token in top_10_tokens])

Expand Down Expand Up @@ -4386,7 +4385,7 @@ def test_query_text_match_custom_analyzer(self):
expr = f"TextMatch({field}, '{token}')"
log.info(f"expr: {expr}")
res, _ = collection_w.query(expr=expr, output_fields=["id", field])
log.info(f"res len {len(res)} res {res}")
log.info(f"res len {len(res)}")
for r in res:
assert token in r[field]

Expand All @@ -4400,7 +4399,7 @@ def test_query_text_match_custom_analyzer(self):
expr = f"TextMatch({field}, '{string_of_top_10_words}')"
log.info(f"expr {expr}")
res, _ = collection_w.query(expr=expr, output_fields=["id", field])
log.info(f"res len {len(res)} res {res}")
log.info(f"res len {len(res)}")
for r in res:
assert any([token in r[field] for token in top_10_tokens])

Expand Down Expand Up @@ -4609,7 +4608,7 @@ def test_query_text_match_with_combined_expression_for_multi_field(self):
log.info(f"expr: {text_match_expr}")
res, _ = collection_w.query(expr=text_match_expr, output_fields=text_fields)
onetime_res = res
log.info(f"res len {len(res)} res {res}")
log.info(f"res len {len(res)}")
step_by_step_results = []
for expr in query:
if isinstance(expr, dict):
Expand All @@ -4626,7 +4625,7 @@ def test_query_text_match_with_combined_expression_for_multi_field(self):
log.info(
f"text match res {len(text_match_df)}\n{text_match_df[key]}"
)
log.info(f"tmp expr {tmp_expr} {len(res)}, {res}")
log.info(f"tmp expr {tmp_expr} {len(res)}")
tmp_idx = [r["id"] for r in res]
step_by_step_results.append(tmp_idx)
pandas_filter_res = cf.generate_pandas_text_match_result(
Expand All @@ -4645,7 +4644,6 @@ def test_query_text_match_with_combined_expression_for_multi_field(self):
)
if isinstance(expr, str):
step_by_step_results.append(expr)
log.info(f"step by step results {step_by_step_results}")
final_res = cf.evaluate_expression(step_by_step_results)
log.info(f"one time res {len(onetime_res)}, final res {len(final_res)}")
if len(onetime_res) != len(final_res):
Expand Down Expand Up @@ -4774,6 +4772,8 @@ def test_query_text_match_with_multi_lang(self):
res, _ = collection_w.query(expr=expr, output_fields=["id", field])
log.info(f"res len {len(res)}")
assert len(res) > 0
for r in res:
assert token in r[field]

# query single field for multi-word
for field in text_fields:
Expand All @@ -4786,6 +4786,9 @@ def test_query_text_match_with_multi_lang(self):
log.info(f"expr {expr}")
res, _ = collection_w.query(expr=expr, output_fields=["id", field])
log.info(f"res len {len(res)}")
assert len(res) > 0
for r in res:
assert any([token in r[field] for token in multi_words])

@pytest.mark.tags(CaseLabel.L1)
def test_query_text_match_with_addition_inverted_index(self):
Expand Down Expand Up @@ -4847,29 +4850,28 @@ def test_query_text_match_with_addition_inverted_index(self):
for i in range(data_size):
d = {
"id": i,
"word": fake_en.word(),
"sentence": fake_en.sentence(),
"paragraph": fake_en.paragraph(),
"text": fake_en.text(),
"word": fake_en.word().lower(),
"sentence": fake_en.sentence().lower(),
"paragraph": fake_en.paragraph().lower(),
"text": fake_en.text().lower(),
"emb": cf.gen_vectors(1, dim)[0],
}
data.append(d)
log.info(f"data\n{data[:10]}")
batch_size = 5000
for i in range(0, data_size, batch_size):
collection_w.insert(
data[i : i + batch_size]
if i + batch_size < data_size
else data[i:data_size]
)
collection_w.flush()
collection_w.create_index(
"emb",
{"index_type": "IVF_SQ8", "metric_type": "L2", "params": {"nlist": 64}},
)
collection_w.create_index("word", {"index_type": "INVERTED"})
collection_w.load()
df = pd.DataFrame(data)
log.info(f"dataframe\n{df}")
text_fields = ["word", "sentence", "paragraph", "text"]
wf_map = {}
for field in text_fields:
Expand All @@ -4880,14 +4882,17 @@ def test_query_text_match_with_addition_inverted_index(self):
expr = f"TextMatch({field}, '{token}')"
log.info(f"expr: {expr}")
res, _ = collection_w.query(expr=expr, output_fields=["id", field])
log.info(f"res len {len(res)} res {res}")
log.info(f"res len {len(res)}")
assert len(res) > 0
for r in res:
assert token in r[field]
if field == "word":
assert len(res) == wf_map[field].most_common()[-1][1]
expr = f"{field} == '{token}'"
log.info(f"expr: {expr}")
res, _ = collection_w.query(expr=expr, output_fields=["id", field])
log.info(f"res len {len(res)} res {res}")
log.info(f"res len {len(res)}")
assert len(res) == wf_map[field].most_common()[-1][1]

@pytest.mark.tags(CaseLabel.L1)
def test_query_text_match_with_some_empty_string(self):
Expand Down Expand Up @@ -4991,9 +4996,9 @@ def test_query_text_match_with_some_empty_string(self):
batch_size = 5000
for i in range(0, len(df), batch_size):
collection_w.insert(
data[i : i + batch_size]
data[i: i + batch_size]
if i + batch_size < len(df)
else data[i : len(df)]
else data[i: len(df)]
)
collection_w.flush()
collection_w.create_index(
Expand All @@ -5009,7 +5014,8 @@ def test_query_text_match_with_some_empty_string(self):
res, _ = collection_w.query(expr=expr, output_fields=["id", field])
log.info(f"res len {len(res)}")
assert len(res) > 0

for r in res:
assert token in r[field]
# query single field for multi-word
for field in text_fields:
# match top 3 most common words
Expand All @@ -5021,6 +5027,9 @@ def test_query_text_match_with_some_empty_string(self):
log.info(f"expr {expr}")
res, _ = collection_w.query(expr=expr, output_fields=["id", field])
log.info(f"res len {len(res)}")
assert len(res) > 0
for r in res:
assert any([token in r[field] for token in multi_words])


class TestQueryTextMatchNegative(TestcaseBase):
Expand Down

0 comments on commit 4c370c5

Please sign in to comment.