Skip to content

Commit

Permalink
[Cosmos] Fix async vector query tests (#37685)
Browse files Browse the repository at this point in the history
* fix tests

* update messages

* Revert "update messages"

This reverts commit 30a6eac.

* revert flaky test updates

* small changes

* Update test_query_vector_similarity.py

* Update test_query_vector_similarity_async.py

* update tests, fix buffer logic
  • Loading branch information
simorenoh authored Oct 7, 2024
1 parent 28ad1af commit ea43b07
Show file tree
Hide file tree
Showing 4 changed files with 17 additions and 16 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -110,9 +110,9 @@ async def _create_pipelined_execution_context(self, query_execution_info):

# throw exception here for vector search query without limit filter or limit > max_limit
if query_execution_info.get_non_streaming_order_by():
total_item_buffer = query_execution_info.get_top() or\
query_execution_info.get_limit() + query_execution_info.get_offset()
if total_item_buffer is None:
total_item_buffer = (query_execution_info.get_top() or 0) or \
((query_execution_info.get_limit() or 0) + (query_execution_info.get_offset() or 0))
if total_item_buffer == 0:
raise ValueError("Executing a vector search query without TOP or LIMIT can consume many" +
" RUs very fast and have long runtimes. Please ensure you are using one" +
" of the two filters with your vector search query.")
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -121,12 +121,13 @@ def _create_pipelined_execution_context(self, query_execution_info):

# throw exception here for vector search query without limit filter or limit > max_limit
if query_execution_info.get_non_streaming_order_by():
total_item_number = query_execution_info.get_top() or query_execution_info.get_limit()
if total_item_number is None:
total_item_buffer = (query_execution_info.get_top() or 0) or \
((query_execution_info.get_limit() or 0) + (query_execution_info.get_offset() or 0))
if total_item_buffer == 0:
raise ValueError("Executing a vector search query without TOP or LIMIT can consume many" +
" RUs very fast and have long runtimes. Please ensure you are using one" +
" of the two filters with your vector search query.")
if total_item_number > os.environ.get('AZURE_COSMOS_MAX_ITEM_BUFFER_VECTOR_SEARCH', 50000):
if total_item_buffer > os.environ.get('AZURE_COSMOS_MAX_ITEM_BUFFER_VECTOR_SEARCH', 50000):
raise ValueError("Executing a vector search query with more items than the max is not allowed." +
"Please ensure you are using a limit smaller than the max, or change the max.")
execution_context_aggregator =\
Expand Down
6 changes: 3 additions & 3 deletions sdk/cosmos/azure-cosmos/test/test_query_vector_similarity.py
Original file line number Diff line number Diff line change
Expand Up @@ -210,10 +210,10 @@ def test_vector_query_pagination(self):
max_item_count=3)
all_fetched_res = []
count = 0
for page in query_iterable.by_page():
fetched_res = list(page)
all_fetched_res.extend(fetched_res)
item_pages = query_iterable.by_page()
for items in item_pages:
count += 1
all_fetched_res.extend(list(items))
assert count == 3
assert len(all_fetched_res) == 8
verify_ordering(all_fetched_res, "cosine")
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -25,7 +25,7 @@ def verify_ordering(item_list, distance_function):
assert item_list[i]["SimilarityScore"] >= item_list[i + 1]["SimilarityScore"]


class TestVectorSimilarityQueryAsync(unittest.TestCase):
class TestVectorSimilarityQueryAsync(unittest.IsolatedAsyncioTestCase):
"""Test to check vector similarity queries behavior."""

created_db: DatabaseProxy = None
Expand Down Expand Up @@ -182,7 +182,7 @@ async def test_ordering_distances_async(self):
# disk_ann_list = [item async for item in self.created_diskANN_dotproduct_container.query_items(query=vanilla_query)]
# verify_ordering(disk_ann_list, "dotproduct")

async def test_vector_query_pagination(self):
async def test_vector_query_pagination_async(self):
# load up previously calculated embedding for the given string
vector_string = vector_test_data.get_embedding_string("I am having a wonderful day.")

Expand All @@ -194,15 +194,15 @@ async def test_vector_query_pagination(self):
max_item_count=3)
all_fetched_res = []
count = 0
pages = query_iterable.by_page()
async for items in await pages.__anext__():
item_pages = query_iterable.by_page()
async for items in item_pages:
count += 1
all_fetched_res.extend(items)
assert count >= 3
all_fetched_res.extend([item async for item in items])
assert count == 3
assert len(all_fetched_res) == 8
verify_ordering(all_fetched_res, "cosine")

async def test_vector_query_large_data(self):
async def test_vector_query_large_data_async(self):
# test different limit queries on a larger data set
embedding_value = 0.0001
for i in range(2000):
Expand Down

0 comments on commit ea43b07

Please sign in to comment.