[Cosmos] Fix async vector query tests (#37685)

* fix tests * update messages * Revert "update messages" This reverts commit 30a6eac. * revert flaky test updates * small changes * Update test_query_vector_similarity.py * Update test_query_vector_similarity_async.py * update tests, fix buffer logic
Azure · Oct 7, 2024 · ea43b07 · ea43b07
1 parent 28ad1af
commit ea43b07
Show file tree

Hide file tree

Showing 4 changed files with 17 additions and 16 deletions.
diff --git a/sdk/cosmos/azure-cosmos/azure/cosmos/_execution_context/aio/execution_dispatcher.py b/sdk/cosmos/azure-cosmos/azure/cosmos/_execution_context/aio/execution_dispatcher.py
@@ -110,9 +110,9 @@ async def _create_pipelined_execution_context(self, query_execution_info):
 
         # throw exception here for vector search query without limit filter or limit > max_limit
         if query_execution_info.get_non_streaming_order_by():
-            total_item_buffer = query_execution_info.get_top() or\
-                                query_execution_info.get_limit() + query_execution_info.get_offset()
-            if total_item_buffer is None:
+            total_item_buffer = (query_execution_info.get_top() or 0) or \
+                                ((query_execution_info.get_limit() or 0) + (query_execution_info.get_offset() or 0))
+            if total_item_buffer == 0:
                 raise ValueError("Executing a vector search query without TOP or LIMIT can consume many" +
                                  " RUs very fast and have long runtimes. Please ensure you are using one" +
                                  " of the two filters with your vector search query.")

diff --git a/sdk/cosmos/azure-cosmos/azure/cosmos/_execution_context/execution_dispatcher.py b/sdk/cosmos/azure-cosmos/azure/cosmos/_execution_context/execution_dispatcher.py
@@ -121,12 +121,13 @@ def _create_pipelined_execution_context(self, query_execution_info):
 
         # throw exception here for vector search query without limit filter or limit > max_limit
         if query_execution_info.get_non_streaming_order_by():
-            total_item_number = query_execution_info.get_top() or query_execution_info.get_limit()
-            if total_item_number is None:
+            total_item_buffer = (query_execution_info.get_top() or 0) or \
+                                ((query_execution_info.get_limit() or 0) + (query_execution_info.get_offset() or 0))
+            if total_item_buffer == 0:
                 raise ValueError("Executing a vector search query without TOP or LIMIT can consume many" +
                                  " RUs very fast and have long runtimes. Please ensure you are using one" +
                                  " of the two filters with your vector search query.")
-            if total_item_number > os.environ.get('AZURE_COSMOS_MAX_ITEM_BUFFER_VECTOR_SEARCH', 50000):
+            if total_item_buffer > os.environ.get('AZURE_COSMOS_MAX_ITEM_BUFFER_VECTOR_SEARCH', 50000):
                 raise ValueError("Executing a vector search query with more items than the max is not allowed." +
                                  "Please ensure you are using a limit smaller than the max, or change the max.")
             execution_context_aggregator =\

diff --git a/sdk/cosmos/azure-cosmos/test/test_query_vector_similarity.py b/sdk/cosmos/azure-cosmos/test/test_query_vector_similarity.py
@@ -210,10 +210,10 @@ def test_vector_query_pagination(self):
                                                                              max_item_count=3)
         all_fetched_res = []
         count = 0
-        for page in query_iterable.by_page():
-            fetched_res = list(page)
-            all_fetched_res.extend(fetched_res)
+        item_pages = query_iterable.by_page()
+        for items in item_pages:
             count += 1
+            all_fetched_res.extend(list(items))
         assert count == 3
         assert len(all_fetched_res) == 8
         verify_ordering(all_fetched_res, "cosine")

diff --git a/sdk/cosmos/azure-cosmos/test/test_query_vector_similarity_async.py b/sdk/cosmos/azure-cosmos/test/test_query_vector_similarity_async.py
@@ -25,7 +25,7 @@ def verify_ordering(item_list, distance_function):
             assert item_list[i]["SimilarityScore"] >= item_list[i + 1]["SimilarityScore"]
 
 
-class TestVectorSimilarityQueryAsync(unittest.TestCase):
+class TestVectorSimilarityQueryAsync(unittest.IsolatedAsyncioTestCase):
     """Test to check vector similarity queries behavior."""
 
     created_db: DatabaseProxy = None
@@ -182,7 +182,7 @@ async def test_ordering_distances_async(self):
             # disk_ann_list = [item async for item in self.created_diskANN_dotproduct_container.query_items(query=vanilla_query)]
             # verify_ordering(disk_ann_list, "dotproduct")
 
-    async def test_vector_query_pagination(self):
+    async def test_vector_query_pagination_async(self):
         # load up previously calculated embedding for the given string
         vector_string = vector_test_data.get_embedding_string("I am having a wonderful day.")
 
@@ -194,15 +194,15 @@ async def test_vector_query_pagination(self):
                                                                              max_item_count=3)
         all_fetched_res = []
         count = 0
-        pages = query_iterable.by_page()
-        async for items in await pages.__anext__():
+        item_pages = query_iterable.by_page()
+        async for items in item_pages:
             count += 1
-            all_fetched_res.extend(items)
-        assert count >= 3
+            all_fetched_res.extend([item async for item in items])
+        assert count == 3
         assert len(all_fetched_res) == 8
         verify_ordering(all_fetched_res, "cosine")
 
-    async def test_vector_query_large_data(self):
+    async def test_vector_query_large_data_async(self):
         # test different limit queries on a larger data set
         embedding_value = 0.0001
         for i in range(2000):