From 1cc9145c5ddbb940df0cc52b561ed6a123465d53 Mon Sep 17 00:00:00 2001 From: Annie Liang <64233642+xinlian12@users.noreply.github.com> Date: Fri, 4 Oct 2024 08:29:50 -0700 Subject: [PATCH] addFeedRangesAndUseFeedRangeInQueryChangeFeed (#37687) * Add getFeedRanges API * Add feedRange support in query changeFeed Co-authored-by: annie-mac --- sdk/cosmos/azure-cosmos/CHANGELOG.md | 2 + .../azure-cosmos/azure/cosmos/__init__.py | 2 + sdk/cosmos/azure-cosmos/azure/cosmos/_base.py | 19 +- .../azure/cosmos/_change_feed/__init__.py | 20 + .../azure/cosmos/_change_feed/aio/__init__.py | 20 + .../_change_feed/aio/change_feed_fetcher.py | 205 +++++++++ .../_change_feed/aio/change_feed_iterable.py | 166 +++++++ .../_change_feed/change_feed_fetcher.py | 196 +++++++++ .../_change_feed/change_feed_iterable.py | 159 +++++++ .../_change_feed/change_feed_start_from.py | 199 +++++++++ .../cosmos/_change_feed/change_feed_state.py | 415 ++++++++++++++++++ .../composite_continuation_token.py | 72 +++ ...feed_range_composite_continuation_token.py | 176 ++++++++ .../_change_feed/feed_range_internal.py | 132 ++++++ .../azure/cosmos/_cosmos_client_connection.py | 45 +- .../aio/base_execution_context.py | 17 +- .../base_execution_context.py | 16 +- .../azure-cosmos/azure/cosmos/_feed_range.py | 70 +++ .../_routing/aio/routing_map_provider.py | 14 +- .../cosmos/_routing/routing_map_provider.py | 9 +- .../azure/cosmos/_routing/routing_range.py | 71 +++ .../azure/cosmos/aio/_container.py | 273 ++++++++++-- .../aio/_cosmos_client_connection_async.py | 11 +- .../azure-cosmos/azure/cosmos/container.py | 305 ++++++++++--- .../azure-cosmos/azure/cosmos/exceptions.py | 7 +- .../azure/cosmos/partition_key.py | 25 +- sdk/cosmos/azure-cosmos/samples/examples.py | 21 +- .../azure-cosmos/samples/examples_async.py | 23 + .../azure-cosmos/test/test_change_feed.py | 256 +++++++++++ .../test/test_change_feed_async.py | 280 ++++++++++++ .../test/test_change_feed_split.py | 81 ++++ .../test/test_change_feed_split_async.py | 94 ++++ .../test/test_container_properties_cache.py | 2 +- .../test_container_properties_cache_async.py | 2 +- sdk/cosmos/azure-cosmos/test/test_query.py | 290 +----------- .../azure-cosmos/test/test_query_async.py | 328 +------------- 36 files changed, 3230 insertions(+), 793 deletions(-) create mode 100644 sdk/cosmos/azure-cosmos/azure/cosmos/_change_feed/__init__.py create mode 100644 sdk/cosmos/azure-cosmos/azure/cosmos/_change_feed/aio/__init__.py create mode 100644 sdk/cosmos/azure-cosmos/azure/cosmos/_change_feed/aio/change_feed_fetcher.py create mode 100644 sdk/cosmos/azure-cosmos/azure/cosmos/_change_feed/aio/change_feed_iterable.py create mode 100644 sdk/cosmos/azure-cosmos/azure/cosmos/_change_feed/change_feed_fetcher.py create mode 100644 sdk/cosmos/azure-cosmos/azure/cosmos/_change_feed/change_feed_iterable.py create mode 100644 sdk/cosmos/azure-cosmos/azure/cosmos/_change_feed/change_feed_start_from.py create mode 100644 sdk/cosmos/azure-cosmos/azure/cosmos/_change_feed/change_feed_state.py create mode 100644 sdk/cosmos/azure-cosmos/azure/cosmos/_change_feed/composite_continuation_token.py create mode 100644 sdk/cosmos/azure-cosmos/azure/cosmos/_change_feed/feed_range_composite_continuation_token.py create mode 100644 sdk/cosmos/azure-cosmos/azure/cosmos/_change_feed/feed_range_internal.py create mode 100644 sdk/cosmos/azure-cosmos/azure/cosmos/_feed_range.py create mode 100644 sdk/cosmos/azure-cosmos/test/test_change_feed.py create mode 100644 sdk/cosmos/azure-cosmos/test/test_change_feed_async.py create mode 100644 sdk/cosmos/azure-cosmos/test/test_change_feed_split.py create mode 100644 sdk/cosmos/azure-cosmos/test/test_change_feed_split_async.py diff --git a/sdk/cosmos/azure-cosmos/CHANGELOG.md b/sdk/cosmos/azure-cosmos/CHANGELOG.md index d1276edab75a..df20a43c36bf 100644 --- a/sdk/cosmos/azure-cosmos/CHANGELOG.md +++ b/sdk/cosmos/azure-cosmos/CHANGELOG.md @@ -5,6 +5,8 @@ #### Features Added * Added Retry Policy for Container Recreate in the Python SDK. See [PR 36043](https://github.com/Azure/azure-sdk-for-python/pull/36043) * Added option to disable write payload on writes. See [PR 37365](https://github.com/Azure/azure-sdk-for-python/pull/37365) +* Added get feed ranges API. See [PR 37687](https://github.com/Azure/azure-sdk-for-python/pull/37687) +* Added feed range support in `query_items_change_feed`. See [PR 37687](https://github.com/Azure/azure-sdk-for-python/pull/37687) #### Breaking Changes diff --git a/sdk/cosmos/azure-cosmos/azure/cosmos/__init__.py b/sdk/cosmos/azure-cosmos/azure/cosmos/__init__.py index 6565ebed8c89..b1e3d8bf2a30 100644 --- a/sdk/cosmos/azure-cosmos/azure/cosmos/__init__.py +++ b/sdk/cosmos/azure-cosmos/azure/cosmos/__init__.py @@ -42,6 +42,7 @@ ) from .partition_key import PartitionKey from .permission import Permission +from ._feed_range import FeedRange __all__ = ( "CosmosClient", @@ -64,5 +65,6 @@ "TriggerType", "ConnectionRetryPolicy", "ThroughputProperties", + "FeedRange" ) __version__ = VERSION diff --git a/sdk/cosmos/azure-cosmos/azure/cosmos/_base.py b/sdk/cosmos/azure-cosmos/azure/cosmos/_base.py index 050de69c46e7..37f9a8fe0c60 100644 --- a/sdk/cosmos/azure-cosmos/azure/cosmos/_base.py +++ b/sdk/cosmos/azure-cosmos/azure/cosmos/_base.py @@ -284,23 +284,8 @@ def GetHeaders( # pylint: disable=too-many-statements,too-many-branches if options.get("disableRUPerMinuteUsage"): headers[http_constants.HttpHeaders.DisableRUPerMinuteUsage] = options["disableRUPerMinuteUsage"] - if options.get("changeFeed") is True: - # On REST level, change feed is using IfNoneMatch/ETag instead of continuation. - if_none_match_value = None - if options.get("continuation"): - if_none_match_value = options["continuation"] - elif options.get("isStartFromBeginning") and not options["isStartFromBeginning"]: - if_none_match_value = "*" - elif options.get("startTime"): - start_time = options.get("startTime") - headers[http_constants.HttpHeaders.IfModified_since] = start_time - if if_none_match_value: - headers[http_constants.HttpHeaders.IfNoneMatch] = if_none_match_value - - headers[http_constants.HttpHeaders.AIM] = http_constants.HttpHeaders.IncrementalFeedHeaderValue - else: - if options.get("continuation"): - headers[http_constants.HttpHeaders.Continuation] = options["continuation"] + if options.get("continuation"): + headers[http_constants.HttpHeaders.Continuation] = options["continuation"] if options.get("populatePartitionKeyRangeStatistics"): headers[http_constants.HttpHeaders.PopulatePartitionKeyRangeStatistics] = options[ diff --git a/sdk/cosmos/azure-cosmos/azure/cosmos/_change_feed/__init__.py b/sdk/cosmos/azure-cosmos/azure/cosmos/_change_feed/__init__.py new file mode 100644 index 000000000000..f5373937e446 --- /dev/null +++ b/sdk/cosmos/azure-cosmos/azure/cosmos/_change_feed/__init__.py @@ -0,0 +1,20 @@ +# The MIT License (MIT) +# Copyright (c) 2014 Microsoft Corporation + +# Permission is hereby granted, free of charge, to any person obtaining a copy +# of this software and associated documentation files (the "Software"), to deal +# in the Software without restriction, including without limitation the rights +# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +# copies of the Software, and to permit persons to whom the Software is +# furnished to do so, subject to the following conditions: + +# The above copyright notice and this permission notice shall be included in all +# copies or substantial portions of the Software. + +# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE +# SOFTWARE. diff --git a/sdk/cosmos/azure-cosmos/azure/cosmos/_change_feed/aio/__init__.py b/sdk/cosmos/azure-cosmos/azure/cosmos/_change_feed/aio/__init__.py new file mode 100644 index 000000000000..f5373937e446 --- /dev/null +++ b/sdk/cosmos/azure-cosmos/azure/cosmos/_change_feed/aio/__init__.py @@ -0,0 +1,20 @@ +# The MIT License (MIT) +# Copyright (c) 2014 Microsoft Corporation + +# Permission is hereby granted, free of charge, to any person obtaining a copy +# of this software and associated documentation files (the "Software"), to deal +# in the Software without restriction, including without limitation the rights +# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +# copies of the Software, and to permit persons to whom the Software is +# furnished to do so, subject to the following conditions: + +# The above copyright notice and this permission notice shall be included in all +# copies or substantial portions of the Software. + +# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE +# SOFTWARE. diff --git a/sdk/cosmos/azure-cosmos/azure/cosmos/_change_feed/aio/change_feed_fetcher.py b/sdk/cosmos/azure-cosmos/azure/cosmos/_change_feed/aio/change_feed_fetcher.py new file mode 100644 index 000000000000..d997360e4c41 --- /dev/null +++ b/sdk/cosmos/azure-cosmos/azure/cosmos/_change_feed/aio/change_feed_fetcher.py @@ -0,0 +1,205 @@ +# The MIT License (MIT) +# Copyright (c) 2014 Microsoft Corporation + +# Permission is hereby granted, free of charge, to any person obtaining a copy +# of this software and associated documentation files (the "Software"), to deal +# in the Software without restriction, including without limitation the rights +# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +# copies of the Software, and to permit persons to whom the Software is +# furnished to do so, subject to the following conditions: + +# The above copyright notice and this permission notice shall be included in all +# copies or substantial portions of the Software. + +# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE +# SOFTWARE. + +"""Internal class for processing change feed implementation in the Azure Cosmos +database service. +""" +import base64 +import json +from abc import ABC, abstractmethod +from typing import Dict, Any, List, Callable, Tuple, Awaitable, cast + +from azure.cosmos import http_constants, exceptions +from azure.cosmos._change_feed.change_feed_start_from import ChangeFeedStartFromType +from azure.cosmos._change_feed.change_feed_state import ChangeFeedStateV2, ChangeFeedStateVersion +from azure.cosmos.aio import _retry_utility_async +from azure.cosmos.exceptions import CosmosHttpResponseError + +# pylint: disable=protected-access + +class ChangeFeedFetcher(ABC): + + @abstractmethod + async def fetch_next_block(self) -> List[Dict[str, Any]]: + pass + +class ChangeFeedFetcherV1(ChangeFeedFetcher): + """Internal class for change feed fetch v1 implementation. + This is used when partition key range id is used or when the supplied continuation token is in just simple etag. + Please note v1 does not support split or merge. + + """ + def __init__( + self, + client, + resource_link: str, + feed_options: Dict[str, Any], + fetch_function: Callable[[Dict[str, Any]], Awaitable[Tuple[List[Dict[str, Any]], Dict[str, Any]]]] + ) -> None: + + self._client = client + self._feed_options = feed_options + + self._change_feed_state = self._feed_options.pop("changeFeedState") + if self._change_feed_state.version != ChangeFeedStateVersion.V1: + raise ValueError(f"ChangeFeedFetcherV1 can not handle change feed state version" + f" {type(self._change_feed_state)}") + + self._resource_link = resource_link + self._fetch_function = fetch_function + + async def fetch_next_block(self) -> List[Dict[str, Any]]: + """Returns a block of results. + + :return: List of results. + :rtype: list + """ + async def callback(): + return await self.fetch_change_feed_items() + + return await _retry_utility_async.ExecuteAsync(self._client, self._client._global_endpoint_manager, callback) + + async def fetch_change_feed_items(self) -> List[Dict[str, Any]]: + self._feed_options["changeFeedState"] = self._change_feed_state + + self._change_feed_state.populate_feed_options(self._feed_options) + is_s_time_first_fetch = self._change_feed_state._continuation is None + while True: + (fetched_items, response_headers) = await self._fetch_function(self._feed_options) + continuation_key = http_constants.HttpHeaders.ETag + # In change feed queries, the continuation token is always populated. The hasNext() test is whether + # there is any items in the response or not. + self._change_feed_state.apply_server_response_continuation( + cast(str, response_headers.get(continuation_key)), + bool(fetched_items)) + + if fetched_items: + break + + # When processing from point in time, there will be no initial results being returned, + # so we will retry with the new continuation token again + if (self._change_feed_state._change_feed_start_from.version == ChangeFeedStartFromType.POINT_IN_TIME + and is_s_time_first_fetch): + is_s_time_first_fetch = False + else: + break + return fetched_items + + +class ChangeFeedFetcherV2(object): + """Internal class for change feed fetch v2 implementation. + """ + + def __init__( + self, + client, + resource_link: str, + feed_options: Dict[str, Any], + fetch_function: Callable[[Dict[str, Any]], Awaitable[Tuple[List[Dict[str, Any]], Dict[str, Any]]]] + ) -> None: + + self._client = client + self._feed_options = feed_options + + self._change_feed_state: ChangeFeedStateV2 = self._feed_options.pop("changeFeedState") + if self._change_feed_state.version != ChangeFeedStateVersion.V2: + raise ValueError(f"ChangeFeedFetcherV2 can not handle change feed state version " + f"{type(self._change_feed_state.version)}") + + self._resource_link = resource_link + self._fetch_function = fetch_function + + async def fetch_next_block(self) -> List[Dict[str, Any]]: + """Returns a block of results. + + :return: List of results. + :rtype: list + """ + + async def callback(): + return await self.fetch_change_feed_items() + + try: + return await _retry_utility_async.ExecuteAsync( + self._client, + self._client._global_endpoint_manager, + callback) + except CosmosHttpResponseError as e: + if exceptions._partition_range_is_gone(e) or exceptions._is_partition_split_or_merge(e): + # refresh change feed state + await self._change_feed_state.handle_feed_range_gone_async( + self._client._routing_map_provider, + self._resource_link) + else: + raise e + + return await self.fetch_next_block() + + async def fetch_change_feed_items(self) -> List[Dict[str, Any]]: + self._feed_options["changeFeedState"] = self._change_feed_state + + self._change_feed_state.populate_feed_options(self._feed_options) + + is_s_time_first_fetch = True + while True: + (fetched_items, response_headers) = await self._fetch_function(self._feed_options) + + continuation_key = http_constants.HttpHeaders.ETag + # In change feed queries, the continuation token is always populated. The hasNext() test is whether + # there is any items in the response or not. + + self._change_feed_state.apply_server_response_continuation( + cast(str, response_headers.get(continuation_key)), + bool(fetched_items)) + + if fetched_items: + self._change_feed_state._continuation._move_to_next_token() + response_headers[continuation_key] = self._get_base64_encoded_continuation() + break + + # when there is no items being returned, we will decide to retry based on: + # 1. When processing from point in time, there will be no initial results being returned, + # so we will retry with the new continuation token + # 2. if the feed range of the changeFeedState span multiple physical partitions + # then we will read from the next feed range until we have looped through all physical partitions + if (self._change_feed_state._change_feed_start_from.version == ChangeFeedStartFromType.POINT_IN_TIME + and is_s_time_first_fetch): + response_headers[continuation_key] = self._get_base64_encoded_continuation() + is_s_time_first_fetch = False + should_retry = True + else: + self._change_feed_state._continuation._move_to_next_token() + response_headers[continuation_key] = self._get_base64_encoded_continuation() + should_retry = self._change_feed_state.should_retry_on_not_modified_response() + is_s_time_first_fetch = False + + if not should_retry: + break + + return fetched_items + + def _get_base64_encoded_continuation(self) -> str: + continuation_json = json.dumps(self._change_feed_state.to_dict()) + json_bytes = continuation_json.encode('utf-8') + # Encode the bytes to a Base64 string + base64_bytes = base64.b64encode(json_bytes) + # Convert the Base64 bytes to a string + return base64_bytes.decode('utf-8') diff --git a/sdk/cosmos/azure-cosmos/azure/cosmos/_change_feed/aio/change_feed_iterable.py b/sdk/cosmos/azure-cosmos/azure/cosmos/_change_feed/aio/change_feed_iterable.py new file mode 100644 index 000000000000..3f73050dfc7a --- /dev/null +++ b/sdk/cosmos/azure-cosmos/azure/cosmos/_change_feed/aio/change_feed_iterable.py @@ -0,0 +1,166 @@ +# The MIT License (MIT) +# Copyright (c) 2014 Microsoft Corporation + +# Permission is hereby granted, free of charge, to any person obtaining a copy +# of this software and associated documentation files (the "Software"), to deal +# in the Software without restriction, including without limitation the rights +# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +# copies of the Software, and to permit persons to whom the Software is +# furnished to do so, subject to the following conditions: + +# The above copyright notice and this permission notice shall be included in all +# copies or substantial portions of the Software. + +# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE +# SOFTWARE. + +"""Iterable change feed results in the Azure Cosmos database service. +""" +from typing import Dict, Any, Optional, Callable, Tuple, List, Awaitable, Union + +from azure.core.async_paging import AsyncPageIterator + +from azure.cosmos._change_feed.aio.change_feed_fetcher import ChangeFeedFetcherV1, ChangeFeedFetcherV2 +from azure.cosmos._change_feed.change_feed_state import ChangeFeedState, ChangeFeedStateVersion + + +# pylint: disable=protected-access + +class ChangeFeedIterable(AsyncPageIterator): + """Represents an iterable object of the change feed results. + + ChangeFeedIterable is a wrapper for change feed execution. + """ + + def __init__( + self, + client, + options: Dict[str, Any], + fetch_function=Optional[Callable[[Dict[str, Any]], Awaitable[Tuple[List[Dict[str, Any]], Dict[str, Any]]]]], + collection_link=Optional[str], + continuation_token=Optional[str], + ) -> None: + """Instantiates a ChangeFeedIterable for non-client side partitioning queries. + + :param CosmosClient client: Instance of document client. + :param dict options: The request options for the request. + :param fetch_function: The fetch function. + :param collection_link: The collection resource link. + :param continuation_token: The continuation token passed in from by_page + """ + + self._client = client + self.retry_options = client.connection_policy.RetryOptions + self._options = options + self._fetch_function = fetch_function + self._collection_link = collection_link + self._change_feed_fetcher: Optional[Union[ChangeFeedFetcherV1, ChangeFeedFetcherV2]] = None + + if self._options.get("changeFeedStateContext") is None: + raise ValueError("Missing changeFeedStateContext in feed options") + + change_feed_state_context = self._options.pop("changeFeedStateContext") + + continuation = continuation_token if continuation_token is not None\ + else change_feed_state_context.pop("continuation", None) + + # analysis and validate continuation token + # there are two types of continuation token we support currently: + # v1 version: the continuation token would just be the _etag, + # which is being returned when customer is using partition_key_range_id, + # which is under deprecation and does not support split/merge + # v2 version: the continuation token will be base64 encoded composition token + # which includes full change feed state + if continuation is not None: + if continuation.isdigit() or continuation.strip('\'"').isdigit(): + change_feed_state_context["continuationPkRangeId"] = continuation + else: + change_feed_state_context["continuationFeedRange"] = continuation + + self._validate_change_feed_state_context(change_feed_state_context) + self._options["changeFeedStateContext"] = change_feed_state_context + + super(ChangeFeedIterable, self).__init__( + self._fetch_next, + self._unpack, # type: ignore[arg-type] + continuation_token=continuation_token) + + async def _unpack( + self, + block: List[Dict[str, Any]] + ) -> Tuple[Optional[str], List[Dict[str, Any]]]: + continuation: Optional[str] = None + if self._client.last_response_headers: + continuation = self._client.last_response_headers.get('etag') + + if block: + self._did_a_call_already = False + return continuation, block + + async def _fetch_next(self, *args) -> List[Dict[str, Any]]: # pylint: disable=unused-argument + """Return a block of results with respecting retry policy. + + :param Any args: + :return: List of results. + :rtype: list + """ + if self._change_feed_fetcher is None: + await self._initialize_change_feed_fetcher() + + assert self._change_feed_fetcher is not None + block = await self._change_feed_fetcher.fetch_next_block() + if not block: + raise StopAsyncIteration + return block + + async def _initialize_change_feed_fetcher(self) -> None: + change_feed_state_context = self._options.pop("changeFeedStateContext") + conn_properties = await self._options.pop("containerProperties") + if change_feed_state_context.get("partitionKey"): + change_feed_state_context["partitionKey"] = await change_feed_state_context.pop("partitionKey") + change_feed_state_context["partitionKeyFeedRange"] =\ + await change_feed_state_context.pop("partitionKeyFeedRange") + + change_feed_state =\ + ChangeFeedState.from_json(self._collection_link, conn_properties["_rid"], change_feed_state_context) + self._options["changeFeedState"] = change_feed_state + + if change_feed_state.version == ChangeFeedStateVersion.V1: + self._change_feed_fetcher = ChangeFeedFetcherV1( + self._client, + self._collection_link, + self._options, + self._fetch_function + ) + else: + self._change_feed_fetcher = ChangeFeedFetcherV2( + self._client, + self._collection_link, + self._options, + self._fetch_function + ) + + def _validate_change_feed_state_context(self, change_feed_state_context: Dict[str, Any]) -> None: + + if change_feed_state_context.get("continuationPkRangeId") is not None: + # if continuation token is in v1 format, throw exception if feed_range is set + if change_feed_state_context.get("feedRange") is not None: + raise ValueError("feed_range and continuation are incompatible") + elif change_feed_state_context.get("continuationFeedRange") is not None: + # if continuation token is in v2 format, since the token itself contains the full change feed state + # so we will ignore other parameters (including incompatible parameters) if they passed in + pass + else: + # validation when no continuation is passed + exclusive_keys = ["partitionKeyRangeId", "partitionKey", "feedRange"] + count = sum(1 for key in exclusive_keys if + key in change_feed_state_context and change_feed_state_context[key] is not None) + if count > 1: + raise ValueError( + "partition_key_range_id, partition_key, feed_range are exclusive parameters," + " please only set one of them") diff --git a/sdk/cosmos/azure-cosmos/azure/cosmos/_change_feed/change_feed_fetcher.py b/sdk/cosmos/azure-cosmos/azure/cosmos/_change_feed/change_feed_fetcher.py new file mode 100644 index 000000000000..c3ff6472af28 --- /dev/null +++ b/sdk/cosmos/azure-cosmos/azure/cosmos/_change_feed/change_feed_fetcher.py @@ -0,0 +1,196 @@ +# The MIT License (MIT) +# Copyright (c) 2014 Microsoft Corporation + +# Permission is hereby granted, free of charge, to any person obtaining a copy +# of this software and associated documentation files (the "Software"), to deal +# in the Software without restriction, including without limitation the rights +# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +# copies of the Software, and to permit persons to whom the Software is +# furnished to do so, subject to the following conditions: + +# The above copyright notice and this permission notice shall be included in all +# copies or substantial portions of the Software. + +# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE +# SOFTWARE. + +"""Internal class for processing change feed implementation in the Azure Cosmos +database service. +""" +import base64 +import json +from abc import ABC, abstractmethod +from typing import Dict, Any, List, Callable, Tuple, cast + +from azure.cosmos import _retry_utility, http_constants, exceptions +from azure.cosmos._change_feed.change_feed_start_from import ChangeFeedStartFromType +from azure.cosmos._change_feed.change_feed_state import ChangeFeedStateV1, ChangeFeedStateV2, ChangeFeedStateVersion +from azure.cosmos.exceptions import CosmosHttpResponseError + +# pylint: disable=protected-access + +class ChangeFeedFetcher(ABC): + + @abstractmethod + def fetch_next_block(self): + pass + +class ChangeFeedFetcherV1(ChangeFeedFetcher): + """Internal class for change feed fetch v1 implementation. + This is used when partition key range id is used or when the supplied continuation token is in just simple etag. + Please note v1 does not support split or merge. + + """ + def __init__( + self, + client, + resource_link: str, + feed_options: Dict[str, Any], + fetch_function: Callable[[Dict[str, Any]], Tuple[List[Dict[str, Any]], Dict[str, Any]]] + ) -> None: + + self._client = client + self._feed_options = feed_options + + self._change_feed_state: ChangeFeedStateV1 = self._feed_options.pop("changeFeedState") + if self._change_feed_state.version != ChangeFeedStateVersion.V1: + raise ValueError(f"ChangeFeedFetcherV1 can not handle change feed state version" + f" {type(self._change_feed_state)}") + + self._resource_link = resource_link + self._fetch_function = fetch_function + + def fetch_next_block(self) -> List[Dict[str, Any]]: + """Returns a block of results. + + :return: List of results. + :rtype: list + """ + def callback(): + return self.fetch_change_feed_items() + + return _retry_utility.Execute(self._client, self._client._global_endpoint_manager, callback) + + def fetch_change_feed_items(self) -> List[Dict[str, Any]]: + self._feed_options["changeFeedState"] = self._change_feed_state + + self._change_feed_state.populate_feed_options(self._feed_options) + is_s_time_first_fetch = self._change_feed_state._continuation is None + while True: + (fetched_items, response_headers) = self._fetch_function(self._feed_options) + continuation_key = http_constants.HttpHeaders.ETag + # In change feed queries, the continuation token is always populated. The hasNext() test is whether + # there is any items in the response or not. + self._change_feed_state.apply_server_response_continuation( + cast(str, response_headers.get(continuation_key)), + bool(fetched_items)) + + if fetched_items: + break + + # When processing from point in time, there will be no initial results being returned, + # so we will retry with the new continuation token again + if (self._change_feed_state._change_feed_start_from.version == ChangeFeedStartFromType.POINT_IN_TIME + and is_s_time_first_fetch): + is_s_time_first_fetch = False + else: + break + return fetched_items + + +class ChangeFeedFetcherV2(object): + """Internal class for change feed fetch v2 implementation. + """ + + def __init__( + self, + client, + resource_link: str, + feed_options: Dict[str, Any], + fetch_function: Callable[[Dict[str, Any]], Tuple[List[Dict[str, Any]], Dict[str, Any]]]): + + self._client = client + self._feed_options = feed_options + + self._change_feed_state: ChangeFeedStateV2 = self._feed_options.pop("changeFeedState") + if self._change_feed_state.version != ChangeFeedStateVersion.V2: + raise ValueError(f"ChangeFeedFetcherV2 can not handle change feed state version " + f"{type(self._change_feed_state)}") + + self._resource_link = resource_link + self._fetch_function = fetch_function + + def fetch_next_block(self) -> List[Dict[str, Any]]: + """Returns a block of results. + + :return: List of results. + :rtype: list + """ + + def callback(): + return self.fetch_change_feed_items() + + try: + return _retry_utility.Execute(self._client, self._client._global_endpoint_manager, callback) + except CosmosHttpResponseError as e: + if exceptions._partition_range_is_gone(e) or exceptions._is_partition_split_or_merge(e): + # refresh change feed state + self._change_feed_state.handle_feed_range_gone(self._client._routing_map_provider, self._resource_link) + else: + raise e + + return self.fetch_next_block() + + def fetch_change_feed_items(self) -> List[Dict[str, Any]]: + self._feed_options["changeFeedState"] = self._change_feed_state + + self._change_feed_state.populate_feed_options(self._feed_options) + + is_s_time_first_fetch = self._change_feed_state._continuation.current_token.token is None + while True: + (fetched_items, response_headers) = self._fetch_function(self._feed_options) + + continuation_key = http_constants.HttpHeaders.ETag + # In change feed queries, the continuation token is always populated. + self._change_feed_state.apply_server_response_continuation( + cast(str, response_headers.get(continuation_key)), + bool(fetched_items)) + + if fetched_items: + self._change_feed_state._continuation._move_to_next_token() + response_headers[continuation_key] = self._get_base64_encoded_continuation() + break + + # when there is no items being returned, we will decide to retry based on: + # 1. When processing from point in time, there will be no initial results being returned, + # so we will retry with the new continuation token + # 2. if the feed range of the changeFeedState span multiple physical partitions + # then we will read from the next feed range until we have looped through all physical partitions + if (self._change_feed_state._change_feed_start_from.version == ChangeFeedStartFromType.POINT_IN_TIME + and is_s_time_first_fetch): + response_headers[continuation_key] = self._get_base64_encoded_continuation() + is_s_time_first_fetch = False + should_retry = True + else: + self._change_feed_state._continuation._move_to_next_token() + response_headers[continuation_key] = self._get_base64_encoded_continuation() + should_retry = self._change_feed_state.should_retry_on_not_modified_response() + is_s_time_first_fetch = False + + if not should_retry: + break + + return fetched_items + + def _get_base64_encoded_continuation(self) -> str: + continuation_json = json.dumps(self._change_feed_state.to_dict()) + json_bytes = continuation_json.encode('utf-8') + # Encode the bytes to a Base64 string + base64_bytes = base64.b64encode(json_bytes) + # Convert the Base64 bytes to a string + return base64_bytes.decode('utf-8') diff --git a/sdk/cosmos/azure-cosmos/azure/cosmos/_change_feed/change_feed_iterable.py b/sdk/cosmos/azure-cosmos/azure/cosmos/_change_feed/change_feed_iterable.py new file mode 100644 index 000000000000..bd37b60926cf --- /dev/null +++ b/sdk/cosmos/azure-cosmos/azure/cosmos/_change_feed/change_feed_iterable.py @@ -0,0 +1,159 @@ +# The MIT License (MIT) +# Copyright (c) 2014 Microsoft Corporation + +# Permission is hereby granted, free of charge, to any person obtaining a copy +# of this software and associated documentation files (the "Software"), to deal +# in the Software without restriction, including without limitation the rights +# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +# copies of the Software, and to permit persons to whom the Software is +# furnished to do so, subject to the following conditions: + +# The above copyright notice and this permission notice shall be included in all +# copies or substantial portions of the Software. + +# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE +# SOFTWARE. + +"""Iterable change feed results in the Azure Cosmos database service. +""" +from typing import Dict, Any, Tuple, List, Optional, Callable, cast, Union + +from azure.core.paging import PageIterator + +from azure.cosmos._change_feed.change_feed_fetcher import ChangeFeedFetcherV1, ChangeFeedFetcherV2 +from azure.cosmos._change_feed.change_feed_state import ChangeFeedState, ChangeFeedStateVersion + + +class ChangeFeedIterable(PageIterator): + """Represents an iterable object of the change feed results. + + ChangeFeedIterable is a wrapper for change feed execution. + """ + + def __init__( + self, + client, + options: Dict[str, Any], + fetch_function=Optional[Callable[[Dict[str, Any]], Tuple[List[Dict[str, Any]], Dict[str, Any]]]], + collection_link=Optional[str], + continuation_token=Optional[str], + ) -> None: + """Instantiates a ChangeFeedIterable for non-client side partitioning queries. + + :param CosmosClient client: Instance of document client. + :param dict options: The request options for the request. + :param fetch_function: The fetch function. + :param collection_link: The collection resource link. + :param continuation_token: The continuation token passed in from by_page + """ + + self._client = client + self.retry_options = client.connection_policy.RetryOptions + self._options = options + self._fetch_function = fetch_function + self._collection_link = collection_link + self._change_feed_fetcher: Optional[Union[ChangeFeedFetcherV1, ChangeFeedFetcherV2]] = None + + if self._options.get("changeFeedStateContext") is None: + raise ValueError("Missing changeFeedStateContext in feed options") + + change_feed_state_context = self._options.pop("changeFeedStateContext") + continuation = continuation_token if continuation_token is not None\ + else change_feed_state_context.pop("continuation", None) + + # analysis and validate continuation token + # there are two types of continuation token we support currently: + # v1 version: the continuation token would just be the _etag, + # which is being returned when customer is using partition_key_range_id, + # which is under deprecation and does not support split/merge + # v2 version: the continuation token will be base64 encoded composition token + # which includes full change feed state + if continuation is not None: + if continuation.isdigit() or continuation.strip('\'"').isdigit(): + change_feed_state_context["continuationPkRangeId"] = continuation + else: + change_feed_state_context["continuationFeedRange"] = continuation + + self._validate_change_feed_state_context(change_feed_state_context) + self._options["changeFeedStateContext"] = change_feed_state_context + + super(ChangeFeedIterable, self).__init__( + self._fetch_next, + self._unpack, # type: ignore[arg-type] + continuation_token=continuation_token) + + def _unpack(self, block: List[Dict[str, Any]]) -> Tuple[Optional[str], List[Dict[str, Any]]]: + continuation: Optional[str] = None + if self._client.last_response_headers: + continuation = self._client.last_response_headers.get('etag') + + if block: + self._did_a_call_already = False + return continuation, block + + def _fetch_next(self, *args) -> List[Dict[str, Any]]: # pylint: disable=unused-argument + """Return a block of results with respecting retry policy. + + :param Any args: + :return: List of results. + :rtype: list + """ + + if self._change_feed_fetcher is None: + self._initialize_change_feed_fetcher() + + assert self._change_feed_fetcher is not None + block = self._change_feed_fetcher.fetch_next_block() + if not block: + raise StopIteration + return block + + def _initialize_change_feed_fetcher(self) -> None: + change_feed_state_context = self._options.pop("changeFeedStateContext") + change_feed_state = \ + ChangeFeedState.from_json( + self._collection_link, + cast(str, self._options.get("containerRID")), + change_feed_state_context) + + self._options["changeFeedState"] = change_feed_state + + if change_feed_state.version == ChangeFeedStateVersion.V1: + self._change_feed_fetcher = ChangeFeedFetcherV1( + self._client, + self._collection_link, + self._options, + self._fetch_function + ) + else: + self._change_feed_fetcher = ChangeFeedFetcherV2( + self._client, + self._collection_link, + self._options, + self._fetch_function + ) + + def _validate_change_feed_state_context(self, change_feed_state_context: Dict[str, Any]) -> None: + + if change_feed_state_context.get("continuationPkRangeId") is not None: + # if continuation token is in v1 format, throw exception if feed_range is set + if change_feed_state_context.get("feedRange") is not None: + raise ValueError("feed_range and continuation are incompatible") + elif change_feed_state_context.get("continuationFeedRange") is not None: + # if continuation token is in v2 format, since the token itself contains the full change feed state + # so we will ignore other parameters (including incompatible parameters) if they passed in + pass + else: + # validation when no continuation is passed + exclusive_keys = ["partitionKeyRangeId", "partitionKey", "feedRange"] + count = sum(1 for key in exclusive_keys if + key in change_feed_state_context and change_feed_state_context[key] is not None) + if count > 1: + raise ValueError( + "partition_key_range_id, partition_key, feed_range are exclusive parameters," + " please only set one of them") diff --git a/sdk/cosmos/azure-cosmos/azure/cosmos/_change_feed/change_feed_start_from.py b/sdk/cosmos/azure-cosmos/azure/cosmos/_change_feed/change_feed_start_from.py new file mode 100644 index 000000000000..dc255eced586 --- /dev/null +++ b/sdk/cosmos/azure-cosmos/azure/cosmos/_change_feed/change_feed_start_from.py @@ -0,0 +1,199 @@ +# The MIT License (MIT) +# Copyright (c) 2014 Microsoft Corporation + +# Permission is hereby granted, free of charge, to any person obtaining a copy +# of this software and associated documentation files (the "Software"), to deal +# in the Software without restriction, including without limitation the rights +# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +# copies of the Software, and to permit persons to whom the Software is +# furnished to do so, subject to the following conditions: + +# The above copyright notice and this permission notice shall be included in all +# copies or substantial portions of the Software. + +# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE +# SOFTWARE. + +"""Internal class for change feed start from implementation in the Azure Cosmos database service. +""" + +from abc import ABC, abstractmethod +from datetime import datetime, timezone +from enum import Enum +from typing import Optional, Union, Literal, Any, Dict + +from azure.cosmos import http_constants +from azure.cosmos._routing.routing_range import Range + +class ChangeFeedStartFromType(Enum): + BEGINNING = "Beginning" + NOW = "Now" + LEASE = "Lease" + POINT_IN_TIME = "PointInTime" + +class ChangeFeedStartFromInternal(ABC): + """Abstract class for change feed start from implementation in the Azure Cosmos database service. + """ + + type_property_name = "Type" + + def __init__(self, start_from_type: ChangeFeedStartFromType) -> None: + self.version = start_from_type + + @abstractmethod + def to_dict(self) -> Dict[str, Any]: + pass + + @staticmethod + def from_start_time( + start_time: Optional[Union[datetime, Literal["Now", "Beginning"]]]) -> 'ChangeFeedStartFromInternal': + if start_time is None: + return ChangeFeedStartFromNow() + if isinstance(start_time, datetime): + return ChangeFeedStartFromPointInTime(start_time) + if start_time.lower() == ChangeFeedStartFromType.NOW.value.lower(): + return ChangeFeedStartFromNow() + if start_time.lower() == ChangeFeedStartFromType.BEGINNING.value.lower(): + return ChangeFeedStartFromBeginning() + + raise ValueError(f"Invalid start_time '{start_time}'") + + @staticmethod + def from_json(data: Dict[str, Any]) -> 'ChangeFeedStartFromInternal': + change_feed_start_from_type = data.get(ChangeFeedStartFromInternal.type_property_name) + if change_feed_start_from_type is None: + raise ValueError(f"Invalid start from json [Missing {ChangeFeedStartFromInternal.type_property_name}]") + + if change_feed_start_from_type == ChangeFeedStartFromType.BEGINNING.value: + return ChangeFeedStartFromBeginning.from_json(data) + if change_feed_start_from_type == ChangeFeedStartFromType.LEASE.value: + return ChangeFeedStartFromETagAndFeedRange.from_json(data) + if change_feed_start_from_type == ChangeFeedStartFromType.NOW.value: + return ChangeFeedStartFromNow.from_json(data) + if change_feed_start_from_type == ChangeFeedStartFromType.POINT_IN_TIME.value: + return ChangeFeedStartFromPointInTime.from_json(data) + + raise ValueError(f"Can not process changeFeedStartFrom for type {change_feed_start_from_type}") + + @abstractmethod + def populate_request_headers(self, request_headers) -> None: + pass + + +class ChangeFeedStartFromBeginning(ChangeFeedStartFromInternal): + """Class for change feed start from beginning implementation in the Azure Cosmos database service. + """ + + def __init__(self) -> None: + super().__init__(ChangeFeedStartFromType.BEGINNING) + + def to_dict(self) -> Dict[str, Any]: + return { + self.type_property_name: ChangeFeedStartFromType.BEGINNING.value + } + + def populate_request_headers(self, request_headers) -> None: + pass # there is no headers need to be set for start from beginning + + @classmethod + def from_json(cls, data: Dict[str, Any]) -> 'ChangeFeedStartFromBeginning': + return ChangeFeedStartFromBeginning() + + +class ChangeFeedStartFromETagAndFeedRange(ChangeFeedStartFromInternal): + """Class for change feed start from etag and feed range implementation in the Azure Cosmos database service. + """ + + _etag_property_name = "Etag" + _feed_range_property_name = "FeedRange" + + def __init__(self, etag, feed_range) -> None: + if feed_range is None: + raise ValueError("feed_range is missing") + + self._etag = etag + self._feed_range = feed_range + super().__init__(ChangeFeedStartFromType.LEASE) + + def to_dict(self) -> Dict[str, Any]: + return { + self.type_property_name: ChangeFeedStartFromType.LEASE.value, + self._etag_property_name: self._etag, + self._feed_range_property_name: self._feed_range.to_dict() + } + + @classmethod + def from_json(cls, data: Dict[str, Any]) -> 'ChangeFeedStartFromETagAndFeedRange': + etag = data.get(cls._etag_property_name) + if etag is None: + raise ValueError(f"Invalid change feed start from [Missing {cls._etag_property_name}]") + + feed_range_data = data.get(cls._feed_range_property_name) + if feed_range_data is None: + raise ValueError(f"Invalid change feed start from [Missing {cls._feed_range_property_name}]") + feed_range = Range.ParseFromDict(feed_range_data) + return cls(etag, feed_range) + + def populate_request_headers(self, request_headers) -> None: + # change feed uses etag as the continuationToken + if self._etag: + request_headers[http_constants.HttpHeaders.IfNoneMatch] = self._etag + + +class ChangeFeedStartFromNow(ChangeFeedStartFromInternal): + """Class for change feed start from etag and feed range implementation in the Azure Cosmos database service. + """ + + def __init__(self) -> None: + super().__init__(ChangeFeedStartFromType.NOW) + + def to_dict(self) -> Dict[str, Any]: + return { + self.type_property_name: ChangeFeedStartFromType.NOW.value + } + + def populate_request_headers(self, request_headers) -> None: + request_headers[http_constants.HttpHeaders.IfNoneMatch] = "*" + + @classmethod + def from_json(cls, data: Dict[str, Any]) -> 'ChangeFeedStartFromNow': + return ChangeFeedStartFromNow() + + +class ChangeFeedStartFromPointInTime(ChangeFeedStartFromInternal): + """Class for change feed start from point in time implementation in the Azure Cosmos database service. + """ + + _point_in_time_ms_property_name = "PointInTimeMs" + + def __init__(self, start_time: datetime): + if start_time is None: + raise ValueError("start_time is missing") + + self._start_time = start_time + super().__init__(ChangeFeedStartFromType.POINT_IN_TIME) + + def to_dict(self) -> Dict[str, Any]: + return { + self.type_property_name: ChangeFeedStartFromType.POINT_IN_TIME.value, + self._point_in_time_ms_property_name: + int(self._start_time.astimezone(timezone.utc).timestamp() * 1000) + } + + def populate_request_headers(self, request_headers) -> None: + request_headers[http_constants.HttpHeaders.IfModified_since] =\ + self._start_time.astimezone(timezone.utc).strftime('%a, %d %b %Y %H:%M:%S GMT') + + @classmethod + def from_json(cls, data: Dict[str, Any]) -> 'ChangeFeedStartFromPointInTime': + point_in_time_ms = data.get(cls._point_in_time_ms_property_name) + if point_in_time_ms is None: + raise ValueError(f"Invalid change feed start from {cls._point_in_time_ms_property_name} ") + + point_in_time = datetime.fromtimestamp(point_in_time_ms).astimezone(timezone.utc) + return ChangeFeedStartFromPointInTime(point_in_time) diff --git a/sdk/cosmos/azure-cosmos/azure/cosmos/_change_feed/change_feed_state.py b/sdk/cosmos/azure-cosmos/azure/cosmos/_change_feed/change_feed_state.py new file mode 100644 index 000000000000..a3adecbf34c2 --- /dev/null +++ b/sdk/cosmos/azure-cosmos/azure/cosmos/_change_feed/change_feed_state.py @@ -0,0 +1,415 @@ +# The MIT License (MIT) +# Copyright (c) 2014 Microsoft Corporation + +# Permission is hereby granted, free of charge, to any person obtaining a copy +# of this software and associated documentation files (the "Software"), to deal +# in the Software without restriction, including without limitation the rights +# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +# copies of the Software, and to permit persons to whom the Software is +# furnished to do so, subject to the following conditions: + +# The above copyright notice and this permission notice shall be included in all +# copies or substantial portions of the Software. + +# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE +# SOFTWARE. + +"""Internal class for change feed state implementation in the Azure Cosmos +database service. +""" + +import base64 +import collections +import json +from abc import ABC, abstractmethod +from enum import Enum +from typing import Optional, Union, List, Any, Dict, Deque + +from azure.cosmos import http_constants +from azure.cosmos._change_feed.change_feed_start_from import ChangeFeedStartFromInternal, \ + ChangeFeedStartFromETagAndFeedRange +from azure.cosmos._change_feed.composite_continuation_token import CompositeContinuationToken +from azure.cosmos._change_feed.feed_range_internal import (FeedRangeInternal, FeedRangeInternalEpk, + FeedRangeInternalPartitionKey) +from azure.cosmos._change_feed.feed_range_composite_continuation_token import FeedRangeCompositeContinuation +from azure.cosmos._routing.aio.routing_map_provider import SmartRoutingMapProvider as AsyncSmartRoutingMapProvider +from azure.cosmos._routing.routing_map_provider import SmartRoutingMapProvider +from azure.cosmos._routing.routing_range import Range +from azure.cosmos.exceptions import CosmosHttpResponseError +from azure.cosmos.http_constants import StatusCodes, SubStatusCodes +from azure.cosmos.partition_key import _Empty, _Undefined + +class ChangeFeedStateVersion(Enum): + V1 = "v1" + V2 = "v2" + +class ChangeFeedState(ABC): + version_property_name = "v" + + def __init__(self, version: ChangeFeedStateVersion) -> None: + self.version = version + + @abstractmethod + def populate_feed_options(self, feed_options: Dict[str, Any]) -> None: + pass + + @abstractmethod + def populate_request_headers( + self, + routing_provider: SmartRoutingMapProvider, + request_headers: Dict[str, Any]) -> None: + pass + + @abstractmethod + async def populate_request_headers_async( + self, + async_routing_provider: AsyncSmartRoutingMapProvider, + request_headers: Dict[str, Any]) -> None: + pass + + @abstractmethod + def apply_server_response_continuation(self, continuation: str, has_modified_response: bool) -> None: + pass + + @staticmethod + def from_json( + container_link: str, + container_rid: str, + change_feed_state_context: Dict[str, Any]) -> 'ChangeFeedState': + + if (change_feed_state_context.get("partitionKeyRangeId") + or change_feed_state_context.get("continuationPkRangeId")): + return ChangeFeedStateV1.from_json(container_link, container_rid, change_feed_state_context) + + if change_feed_state_context.get("continuationFeedRange"): + # get changeFeedState from continuation + continuation_json_str = base64.b64decode(change_feed_state_context["continuationFeedRange"]).decode( + 'utf-8') + continuation_json = json.loads(continuation_json_str) + version = continuation_json.get(ChangeFeedState.version_property_name) + if version is None: + raise ValueError("Invalid base64 encoded continuation string [Missing version]") + + if version == ChangeFeedStateVersion.V2.value: + return ChangeFeedStateV2.from_continuation(container_link, container_rid, continuation_json) + + raise ValueError("Invalid base64 encoded continuation string [Invalid version]") + + # when there is no continuation token, by default construct ChangeFeedStateV2 + return ChangeFeedStateV2.from_initial_state(container_link, container_rid, change_feed_state_context) + +class ChangeFeedStateV1(ChangeFeedState): + """Change feed state v1 implementation. + This is used when partition key range id is used or the continuation is just simple _etag + """ + + def __init__( + self, + container_link: str, + container_rid: str, + change_feed_start_from: ChangeFeedStartFromInternal, + partition_key_range_id: Optional[str] = None, + partition_key: Optional[Union[str, int, float, bool, List[Union[str, int, float, bool]], _Empty, _Undefined]] = None, # pylint: disable=line-too-long + continuation: Optional[str] = None) -> None: + + self._container_link = container_link + self._container_rid = container_rid + self._change_feed_start_from = change_feed_start_from + self._partition_key_range_id = partition_key_range_id + self._partition_key = partition_key + self._continuation = continuation + super(ChangeFeedStateV1, self).__init__(ChangeFeedStateVersion.V1) + + @property + def container_rid(self): + return self._container_rid + + @classmethod + def from_json( + cls, + container_link: str, + container_rid: str, + change_feed_state_context: Dict[str, Any]) -> 'ChangeFeedStateV1': + return cls( + container_link, + container_rid, + ChangeFeedStartFromInternal.from_start_time(change_feed_state_context.get("startTime")), + change_feed_state_context.get("partitionKeyRangeId"), + change_feed_state_context.get("partitionKey"), + change_feed_state_context.get("continuationPkRangeId") + ) + + def populate_request_headers( + self, + routing_provider: SmartRoutingMapProvider, + request_headers: Dict[str, Any]) -> None: + request_headers[http_constants.HttpHeaders.AIM] = http_constants.HttpHeaders.IncrementalFeedHeaderValue + + self._change_feed_start_from.populate_request_headers(request_headers) + if self._continuation: + request_headers[http_constants.HttpHeaders.IfNoneMatch] = self._continuation + + async def populate_request_headers_async( + self, + async_routing_provider: AsyncSmartRoutingMapProvider, + request_headers: Dict[str, Any]) -> None: # pylint: disable=unused-argument + + request_headers[http_constants.HttpHeaders.AIM] = http_constants.HttpHeaders.IncrementalFeedHeaderValue + + self._change_feed_start_from.populate_request_headers(request_headers) + if self._continuation: + request_headers[http_constants.HttpHeaders.IfNoneMatch] = self._continuation + + def populate_feed_options(self, feed_options: Dict[str, Any]) -> None: + if self._partition_key_range_id is not None: + feed_options["partitionKeyRangeId"] = self._partition_key_range_id + if self._partition_key is not None: + feed_options["partitionKey"] = self._partition_key + + def apply_server_response_continuation(self, continuation: str, has_modified_response) -> None: + self._continuation = continuation + +class ChangeFeedStateV2(ChangeFeedState): + container_rid_property_name = "containerRid" + change_feed_mode_property_name = "mode" + change_feed_start_from_property_name = "startFrom" + continuation_property_name = "continuation" + + # TODO: adding change feed mode + def __init__( + self, + container_link: str, + container_rid: str, + feed_range: FeedRangeInternal, + change_feed_start_from: ChangeFeedStartFromInternal, + continuation: Optional[FeedRangeCompositeContinuation] + ) -> None: + + self._container_link = container_link + self._container_rid = container_rid + self._feed_range = feed_range + self._change_feed_start_from = change_feed_start_from + if continuation is None: + composite_continuation_token_queue: Deque = collections.deque() + composite_continuation_token_queue.append( + CompositeContinuationToken( + self._feed_range.get_normalized_range(), + None)) + self._continuation =\ + FeedRangeCompositeContinuation( + self._container_rid, + self._feed_range, + composite_continuation_token_queue) + else: + self._continuation = continuation + + super(ChangeFeedStateV2, self).__init__(ChangeFeedStateVersion.V2) + + @property + def container_rid(self) -> str : + return self._container_rid + + def to_dict(self) -> Dict[str, Any]: + return { + self.version_property_name: ChangeFeedStateVersion.V2.value, + self.container_rid_property_name: self._container_rid, + self.change_feed_mode_property_name: "Incremental", + self.change_feed_start_from_property_name: self._change_feed_start_from.to_dict(), + self.continuation_property_name: self._continuation.to_dict() if self._continuation is not None else None + } + + def populate_request_headers( + self, + routing_provider: SmartRoutingMapProvider, + request_headers: Dict[str, Any]) -> None: + request_headers[http_constants.HttpHeaders.AIM] = http_constants.HttpHeaders.IncrementalFeedHeaderValue + + # When a merge happens, the child partition will contain documents ordered by LSN but the _ts/creation time + # of the documents may not be sequential. + # So when reading the changeFeed by LSN, it is possible to encounter documents with lower _ts. + # In order to guarantee we always get the documents after customer's point start time, + # we will need to always pass the start time in the header. + self._change_feed_start_from.populate_request_headers(request_headers) + + if self._continuation.current_token is not None and self._continuation.current_token.token is not None: + change_feed_start_from_feed_range_and_etag =\ + ChangeFeedStartFromETagAndFeedRange( + self._continuation.current_token.token, + self._continuation.current_token.feed_range) + change_feed_start_from_feed_range_and_etag.populate_request_headers(request_headers) + + # based on the feed range to find the overlapping partition key range id + over_lapping_ranges =\ + routing_provider.get_overlapping_ranges( + self._container_link, + [self._continuation.current_token.feed_range]) + + if len(over_lapping_ranges) > 1: + raise self.get_feed_range_gone_error(over_lapping_ranges) + + overlapping_feed_range = Range.PartitionKeyRangeToRange(over_lapping_ranges[0]) + if overlapping_feed_range == self._continuation.current_token.feed_range: + # exactly mapping to one physical partition, only need to set the partitionKeyRangeId + request_headers[http_constants.HttpHeaders.PartitionKeyRangeID] = over_lapping_ranges[0]["id"] + else: + # the current token feed range spans less than single physical partition + # for this case, need to set both the partition key range id and epk filter headers + request_headers[http_constants.HttpHeaders.PartitionKeyRangeID] = over_lapping_ranges[0]["id"] + request_headers[ + http_constants.HttpHeaders.StartEpkString] = self._continuation.current_token.feed_range.min + request_headers[ + http_constants.HttpHeaders.EndEpkString] = self._continuation.current_token.feed_range.max + + async def populate_request_headers_async( + self, + async_routing_provider: AsyncSmartRoutingMapProvider, + request_headers: Dict[str, Any]) -> None: + request_headers[http_constants.HttpHeaders.AIM] = http_constants.HttpHeaders.IncrementalFeedHeaderValue + + # When a merge happens, the child partition will contain documents ordered by LSN but the _ts/creation time + # of the documents may not be sequential. + # So when reading the changeFeed by LSN, it is possible to encounter documents with lower _ts. + # In order to guarantee we always get the documents after customer's point start time, + # we will need to always pass the start time in the header. + self._change_feed_start_from.populate_request_headers(request_headers) + + if self._continuation.current_token is not None and self._continuation.current_token.token is not None: + change_feed_start_from_feed_range_and_etag = \ + ChangeFeedStartFromETagAndFeedRange( + self._continuation.current_token.token, + self._continuation.current_token.feed_range) + change_feed_start_from_feed_range_and_etag.populate_request_headers(request_headers) + + # based on the feed range to find the overlapping partition key range id + over_lapping_ranges = \ + await async_routing_provider.get_overlapping_ranges( + self._container_link, + [self._continuation.current_token.feed_range]) + + if len(over_lapping_ranges) > 1: + raise self.get_feed_range_gone_error(over_lapping_ranges) + + overlapping_feed_range = Range.PartitionKeyRangeToRange(over_lapping_ranges[0]) + if overlapping_feed_range == self._continuation.current_token.feed_range: + # exactly mapping to one physical partition, only need to set the partitionKeyRangeId + request_headers[http_constants.HttpHeaders.PartitionKeyRangeID] = over_lapping_ranges[0]["id"] + else: + # the current token feed range spans less than single physical partition + # for this case, need to set both the partition key range id and epk filter headers + request_headers[http_constants.HttpHeaders.PartitionKeyRangeID] = \ + over_lapping_ranges[0]["id"] + request_headers[http_constants.HttpHeaders.StartEpkString] = \ + self._continuation.current_token.feed_range.min + request_headers[http_constants.HttpHeaders.EndEpkString] = \ + self._continuation.current_token.feed_range.max + + def populate_feed_options(self, feed_options: Dict[str, Any]) -> None: + pass + + def handle_feed_range_gone( + self, + routing_provider: SmartRoutingMapProvider, + resource_link: str) -> None: + self._continuation.handle_feed_range_gone(routing_provider, resource_link) + + async def handle_feed_range_gone_async( + self, + routing_provider: AsyncSmartRoutingMapProvider, + resource_link: str) -> None: + await self._continuation.handle_feed_range_gone_async(routing_provider, resource_link) + + def apply_server_response_continuation(self, continuation: str, has_modified_response: bool) -> None: + self._continuation.apply_server_response_continuation(continuation, has_modified_response) + + def should_retry_on_not_modified_response(self) -> bool: + return self._continuation.should_retry_on_not_modified_response() + + def apply_not_modified_response(self) -> None: + self._continuation.apply_not_modified_response() + + def get_feed_range_gone_error(self, over_lapping_ranges: List[Dict[str, Any]]) -> CosmosHttpResponseError: + formatted_message =\ + (f"Status code: {StatusCodes.GONE} " + f"Sub-status: {SubStatusCodes.PARTITION_KEY_RANGE_GONE}. " + f"Range {self._continuation.current_token.feed_range}" + f" spans {len(over_lapping_ranges)} physical partitions:" + f" {[child_range['id'] for child_range in over_lapping_ranges]}") + + response_error = CosmosHttpResponseError(status_code=StatusCodes.GONE, message=formatted_message) + response_error.sub_status = SubStatusCodes.PARTITION_KEY_RANGE_GONE + return response_error + + @classmethod + def from_continuation( + cls, + container_link: str, + container_rid: str, + continuation_json: Dict[str, Any]) -> 'ChangeFeedStateV2': + + container_rid_from_continuation = continuation_json.get(ChangeFeedStateV2.container_rid_property_name) + if container_rid_from_continuation is None: + raise ValueError(f"Invalid continuation: [Missing {ChangeFeedStateV2.container_rid_property_name}]") + if container_rid_from_continuation != container_rid: + raise ValueError("Invalid continuation: [Mismatch collection rid]") + + change_feed_start_from_data = continuation_json.get(ChangeFeedStateV2.change_feed_start_from_property_name) + if change_feed_start_from_data is None: + raise ValueError(f"Invalid continuation:" + f" [Missing {ChangeFeedStateV2.change_feed_start_from_property_name}]") + change_feed_start_from = ChangeFeedStartFromInternal.from_json(change_feed_start_from_data) + + continuation_data = continuation_json.get(ChangeFeedStateV2.continuation_property_name) + if continuation_data is None: + raise ValueError(f"Invalid continuation: [Missing {ChangeFeedStateV2.continuation_property_name}]") + continuation = FeedRangeCompositeContinuation.from_json(continuation_data) + return ChangeFeedStateV2( + container_link=container_link, + container_rid=container_rid, + feed_range=continuation.feed_range, + change_feed_start_from=change_feed_start_from, + continuation=continuation) + + @classmethod + def from_initial_state( + cls, + container_link: str, + collection_rid: str, + change_feed_state_context: Dict[str, Any]) -> 'ChangeFeedStateV2': + + feed_range: Optional[FeedRangeInternal] = None + if change_feed_state_context.get("feedRange"): + feed_range = change_feed_state_context.get("feedRange") + elif change_feed_state_context.get("partitionKey"): + if change_feed_state_context.get("partitionKeyFeedRange"): + feed_range =\ + FeedRangeInternalPartitionKey( + change_feed_state_context["partitionKey"], + change_feed_state_context["partitionKeyFeedRange"]) + else: + raise ValueError("partitionKey is in the changeFeedStateContext, but missing partitionKeyFeedRange") + else: + # default to full range + feed_range = FeedRangeInternalEpk( + Range( + "", + "FF", + True, + False) + ) + + change_feed_start_from = ( + ChangeFeedStartFromInternal.from_start_time(change_feed_state_context.get("startTime"))) + + if feed_range is not None: + return cls( + container_link=container_link, + container_rid=collection_rid, + feed_range=feed_range, + change_feed_start_from=change_feed_start_from, + continuation=None) + raise RuntimeError("feed_range is empty") diff --git a/sdk/cosmos/azure-cosmos/azure/cosmos/_change_feed/composite_continuation_token.py b/sdk/cosmos/azure-cosmos/azure/cosmos/_change_feed/composite_continuation_token.py new file mode 100644 index 000000000000..f0d433fd966e --- /dev/null +++ b/sdk/cosmos/azure-cosmos/azure/cosmos/_change_feed/composite_continuation_token.py @@ -0,0 +1,72 @@ +# The MIT License (MIT) +# Copyright (c) 2014 Microsoft Corporation + +# Permission is hereby granted, free of charge, to any person obtaining a copy +# of this software and associated documentation files (the "Software"), to deal +# in the Software without restriction, including without limitation the rights +# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +# copies of the Software, and to permit persons to whom the Software is +# furnished to do so, subject to the following conditions: + +# The above copyright notice and this permission notice shall be included in all +# copies or substantial portions of the Software. + +# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE +# SOFTWARE. + +"""Internal class for change feed composite continuation token in the Azure Cosmos +database service. +""" +from typing import Optional, Dict, Any + +from azure.cosmos._routing.routing_range import Range + + +class CompositeContinuationToken: + token_property_name = "token" + feed_range_property_name = "range" + + def __init__(self, feed_range: Range, token: Optional[str] = None) -> None: + if feed_range is None: + raise ValueError("Missing required parameter feed_range") + + self._token = token + self._feed_range = feed_range + + def to_dict(self) -> Dict[str, Any]: + return { + self.token_property_name: self._token, + self.feed_range_property_name: self.feed_range.to_dict() + } + + @property + def feed_range(self) -> Range: + return self._feed_range + + @property + def token(self) -> Optional[str]: + return self._token + + def update_token(self, etag) -> None: + self._token = etag + + @classmethod + def from_json(cls, data) -> 'CompositeContinuationToken': + token = data.get(cls.token_property_name) + if token is None: + raise ValueError(f"Invalid composite token [Missing {cls.token_property_name}]") + + feed_range_data = data.get(cls.feed_range_property_name) + if feed_range_data is None: + raise ValueError(f"Invalid composite token [Missing {cls.feed_range_property_name}]") + + feed_range = Range.ParseFromDict(feed_range_data) + return cls(feed_range=feed_range, token=token) + + def __repr__(self): + return f"CompositeContinuationToken(token={self.token}, range={self.feed_range})" diff --git a/sdk/cosmos/azure-cosmos/azure/cosmos/_change_feed/feed_range_composite_continuation_token.py b/sdk/cosmos/azure-cosmos/azure/cosmos/_change_feed/feed_range_composite_continuation_token.py new file mode 100644 index 000000000000..8f87ccfa194a --- /dev/null +++ b/sdk/cosmos/azure-cosmos/azure/cosmos/_change_feed/feed_range_composite_continuation_token.py @@ -0,0 +1,176 @@ +# The MIT License (MIT) +# Copyright (c) 2014 Microsoft Corporation + +# Permission is hereby granted, free of charge, to any person obtaining a copy +# of this software and associated documentation files (the "Software"), to deal +# in the Software without restriction, including without limitation the rights +# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +# copies of the Software, and to permit persons to whom the Software is +# furnished to do so, subject to the following conditions: + +# The above copyright notice and this permission notice shall be included in all +# copies or substantial portions of the Software. + +# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE +# SOFTWARE. + +"""Internal class for change feed continuation token by feed range in the Azure Cosmos +database service. +""" +from collections import deque +from typing import Any, Deque, Dict, Optional + +from azure.cosmos._change_feed.composite_continuation_token import CompositeContinuationToken +from azure.cosmos._change_feed.feed_range_internal import (FeedRangeInternal, FeedRangeInternalEpk, + FeedRangeInternalPartitionKey) +from azure.cosmos._routing.routing_map_provider import SmartRoutingMapProvider +from azure.cosmos._routing.aio.routing_map_provider import SmartRoutingMapProvider as AsyncSmartRoutingMapProvider +from azure.cosmos._routing.routing_range import Range + +class FeedRangeCompositeContinuation: + _version_property_name = "v" + _container_rid_property_name = "rid" + _continuation_property_name = "continuation" + + def __init__( + self, + container_rid: str, + feed_range: FeedRangeInternal, + continuation: Deque[CompositeContinuationToken]) -> None: + if container_rid is None: + raise ValueError("container_rid is missing") + + self._container_rid = container_rid + self._feed_range = feed_range + self._continuation = continuation + self._current_token = self._continuation[0] + self._initial_no_result_range: Optional[Range] = None + + @property + def current_token(self) -> CompositeContinuationToken: + return self._current_token + + def to_dict(self) -> Dict[str, Any]: + json_data = { + self._version_property_name: "v2", + self._container_rid_property_name: self._container_rid, + self._continuation_property_name: [childToken.to_dict() for childToken in self._continuation], + } + json_data.update(self._feed_range.to_dict()) + return json_data + + @classmethod + def from_json(cls, data) -> 'FeedRangeCompositeContinuation': + version = data.get(cls._version_property_name) + if version is None: + raise ValueError(f"Invalid feed range composite continuation token [Missing {cls._version_property_name}]") + if version != "v2": + raise ValueError("Invalid feed range composite continuation token [Invalid version]") + + container_rid = data.get(cls._container_rid_property_name) + if container_rid is None: + raise ValueError(f"Invalid feed range composite continuation token " + f"[Missing {cls._container_rid_property_name}]") + + continuation_data = data.get(cls._continuation_property_name) + if continuation_data is None: + raise ValueError(f"Invalid feed range composite continuation token " + f"[Missing {cls._continuation_property_name}]") + if not isinstance(continuation_data, list) or len(continuation_data) == 0: + raise ValueError(f"Invalid feed range composite continuation token " + f"[The {cls._continuation_property_name} must be non-empty array]") + continuation = [CompositeContinuationToken.from_json(child_range_continuation_token) + for child_range_continuation_token in continuation_data] + + # parsing feed range + feed_range: Optional[FeedRangeInternal] = None + if data.get(FeedRangeInternalEpk.type_property_name): + feed_range = FeedRangeInternalEpk.from_json(data) + elif data.get(FeedRangeInternalPartitionKey.type_property_name): + feed_range = FeedRangeInternalPartitionKey.from_json(data, continuation[0].feed_range) + else: + raise ValueError("Invalid feed range composite continuation token [Missing feed range scope]") + + return cls(container_rid=container_rid, feed_range=feed_range, continuation=deque(continuation)) + + def handle_feed_range_gone( + self, + routing_provider: SmartRoutingMapProvider, + collection_link: str) -> None: + overlapping_ranges = routing_provider.get_overlapping_ranges(collection_link, [self._current_token.feed_range]) + + if len(overlapping_ranges) == 1: + # merge,reusing the existing the feedRange and continuationToken + pass + else: + # split, remove the parent range and then add new child ranges. + # For each new child range, using the continuation token from the parent + self._continuation.popleft() + for child_range in overlapping_ranges: + self._continuation.append( + CompositeContinuationToken( + Range.PartitionKeyRangeToRange(child_range), + self._current_token.token)) + + self._current_token = self._continuation[0] + + async def handle_feed_range_gone_async( + self, + routing_provider: AsyncSmartRoutingMapProvider, + collection_link: str) -> None: + overlapping_ranges = \ + await routing_provider.get_overlapping_ranges( + collection_link, + [self._current_token.feed_range]) + + if len(overlapping_ranges) == 1: + # merge,reusing the existing the feedRange and continuationToken + pass + else: + # split, remove the parent range and then add new child ranges. + # For each new child range, using the continuation token from the parent + self._continuation.popleft() + for child_range in overlapping_ranges: + self._continuation.append( + CompositeContinuationToken( + Range.PartitionKeyRangeToRange(child_range), + self._current_token.token)) + + self._current_token = self._continuation[0] + + def should_retry_on_not_modified_response(self) -> bool: + # when getting 304(Not Modified) response from one sub feed range, + # we will try to fetch for the next sub feed range + # we will repeat the above logic until we have looped through all sub feed ranges + + # TODO: validate the response headers, can we get the status code + if len(self._continuation) > 1: + return self._current_token.feed_range != self._initial_no_result_range + + return False + + def _move_to_next_token(self) -> None: + first_composition_token = self._continuation.popleft() + # add the composition token to the end of the list + self._continuation.append(first_composition_token) + self._current_token = self._continuation[0] + + def apply_server_response_continuation(self, etag, has_modified_response: bool) -> None: + self._current_token.update_token(etag) + if has_modified_response: + self._initial_no_result_range = None + else: + self.apply_not_modified_response() + + def apply_not_modified_response(self) -> None: + if self._initial_no_result_range is None: + self._initial_no_result_range = self._current_token.feed_range + + @property + def feed_range(self) -> FeedRangeInternal: + return self._feed_range diff --git a/sdk/cosmos/azure-cosmos/azure/cosmos/_change_feed/feed_range_internal.py b/sdk/cosmos/azure-cosmos/azure/cosmos/_change_feed/feed_range_internal.py new file mode 100644 index 000000000000..c04fda0952f9 --- /dev/null +++ b/sdk/cosmos/azure-cosmos/azure/cosmos/_change_feed/feed_range_internal.py @@ -0,0 +1,132 @@ +# The MIT License (MIT) +# Copyright (c) 2014 Microsoft Corporation + +# Permission is hereby granted, free of charge, to any person obtaining a copy +# of this software and associated documentation files (the "Software"), to deal +# in the Software without restriction, including without limitation the rights +# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +# copies of the Software, and to permit persons to whom the Software is +# furnished to do so, subject to the following conditions: + +# The above copyright notice and this permission notice shall be included in all +# copies or substantial portions of the Software. + +# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE +# SOFTWARE. + +"""Internal class for feed range implementation in the Azure Cosmos +database service. +""" +import base64 +import json +from abc import ABC, abstractmethod +from typing import Union, List, Dict, Any, Optional + +from azure.cosmos._routing.routing_range import Range +from azure.cosmos.partition_key import _Undefined, _Empty + + +class FeedRangeInternal(ABC): + + @abstractmethod + def get_normalized_range(self) -> Range: + pass + + @abstractmethod + def to_dict(self) -> Dict[str, Any]: + pass + + def _to_base64_encoded_string(self) -> str: + data_json = json.dumps(self.to_dict()) + json_bytes = data_json.encode('utf-8') + # Encode the bytes to a Base64 string + base64_bytes = base64.b64encode(json_bytes) + # Convert the Base64 bytes to a string + return base64_bytes.decode('utf-8') + +class FeedRangeInternalPartitionKey(FeedRangeInternal): + type_property_name = "PK" + + def __init__( + self, + pk_value: Union[str, int, float, bool, List[Union[str, int, float, bool]], _Empty, _Undefined], + feed_range: Range) -> None: # pylint: disable=line-too-long + + if pk_value is None: + raise ValueError("PartitionKey cannot be None") + if feed_range is None: + raise ValueError("Feed range cannot be None") + + self._pk_value = pk_value + self._feed_range = feed_range + + def get_normalized_range(self) -> Range: + return self._feed_range.to_normalized_range() + + def to_dict(self) -> Dict[str, Any]: + if isinstance(self._pk_value, _Undefined): + return { self.type_property_name: [{}] } + if isinstance(self._pk_value, _Empty): + return { self.type_property_name: [] } + if isinstance(self._pk_value, list): + return { self.type_property_name: list(self._pk_value) } + + return { self.type_property_name: self._pk_value } + + @classmethod + def from_json(cls, data: Dict[str, Any], feed_range: Range) -> 'FeedRangeInternalPartitionKey': + if data.get(cls.type_property_name): + pk_value = data.get(cls.type_property_name) + if not pk_value: + return cls(_Empty(), feed_range) + if pk_value == [{}]: + return cls(_Undefined(), feed_range) + if isinstance(pk_value, list): + return cls(list(pk_value), feed_range) + return cls(data[cls.type_property_name], feed_range) + + raise ValueError(f"Can not parse FeedRangeInternalPartitionKey from the json," + f" there is no property {cls.type_property_name}") + + +class FeedRangeInternalEpk(FeedRangeInternal): + type_property_name = "Range" + + def __init__(self, feed_range: Range) -> None: + if feed_range is None: + raise ValueError("feed_range cannot be None") + + self._range = feed_range + self._base64_encoded_string: Optional[str] = None + + def get_normalized_range(self) -> Range: + return self._range.to_normalized_range() + + def to_dict(self) -> Dict[str, Any]: + return { + self.type_property_name: self._range.to_dict() + } + + @classmethod + def from_json(cls, data: Dict[str, Any]) -> 'FeedRangeInternalEpk': + if data.get(cls.type_property_name): + feed_range = Range.ParseFromDict(data.get(cls.type_property_name)) + return cls(feed_range) + raise ValueError(f"Can not parse FeedRangeInternalEPK from the json," + f" there is no property {cls.type_property_name}") + + def __str__(self) -> str: + """Get a json representation of the feed range. + The returned json string can be used to create a new feed range from it. + + :return: A json representation of the feed range. + """ + if self._base64_encoded_string is None: + self._base64_encoded_string = self._to_base64_encoded_string() + + return self._base64_encoded_string diff --git a/sdk/cosmos/azure-cosmos/azure/cosmos/_cosmos_client_connection.py b/sdk/cosmos/azure-cosmos/azure/cosmos/_cosmos_client_connection.py index 1288e7a4e66e..aa0241d7f289 100644 --- a/sdk/cosmos/azure-cosmos/azure/cosmos/_cosmos_client_connection.py +++ b/sdk/cosmos/azure-cosmos/azure/cosmos/_cosmos_client_connection.py @@ -27,13 +27,11 @@ import urllib.parse from typing import Callable, Dict, Any, Iterable, List, Mapping, Optional, Sequence, Tuple, Union, cast, Type from typing_extensions import TypedDict - from urllib3.util.retry import Retry + +from azure.core import PipelineClient from azure.core.credentials import TokenCredential from azure.core.paging import ItemPaged -from azure.core import PipelineClient -from azure.core.pipeline.transport import HttpRequest, \ - HttpResponse # pylint: disable=no-legacy-azure-core-http-response-import from azure.core.pipeline.policies import ( HTTPPolicy, ContentDecodePolicy, @@ -44,22 +42,29 @@ DistributedTracingPolicy, ProxyPolicy ) +from azure.core.pipeline.transport import HttpRequest, \ + HttpResponse # pylint: disable=no-legacy-azure-core-http-response-import from . import _base as base -from ._base import _set_properties_cache -from . import documents -from .documents import ConnectionPolicy, DatabaseAccount -from ._constants import _Constants as Constants -from . import http_constants, exceptions +from . import _global_endpoint_manager as global_endpoint_manager from . import _query_iterable as query_iterable from . import _runtime_constants as runtime_constants -from ._request_object import RequestObject -from . import _synchronized_request as synchronized_request -from . import _global_endpoint_manager as global_endpoint_manager -from ._routing import routing_map_provider, routing_range -from ._retry_utility import ConnectionRetryPolicy from . import _session +from . import _synchronized_request as synchronized_request from . import _utils +from . import documents +from . import http_constants, exceptions +from ._auth_policy import CosmosBearerTokenCredentialPolicy +from ._base import _set_properties_cache +from ._change_feed.change_feed_iterable import ChangeFeedIterable +from ._change_feed.change_feed_state import ChangeFeedState +from ._constants import _Constants as Constants +from ._cosmos_http_logging_policy import CosmosHttpLoggingPolicy +from ._range_partition_resolver import RangePartitionResolver +from ._request_object import RequestObject +from ._retry_utility import ConnectionRetryPolicy +from ._routing import routing_map_provider, routing_range +from .documents import ConnectionPolicy, DatabaseAccount from .partition_key import ( _Undefined, _Empty, @@ -67,9 +72,6 @@ _return_undefined_or_empty_partition_key, NonePartitionKeyValue ) -from ._auth_policy import CosmosBearerTokenCredentialPolicy -from ._cosmos_http_logging_policy import CosmosHttpLoggingPolicy -from ._range_partition_resolver import RangePartitionResolver PartitionKeyType = Union[str, int, float, bool, Sequence[Union[str, int, float, bool, None]], Type[NonePartitionKeyValue]] # pylint: disable=line-too-long @@ -1160,7 +1162,6 @@ def _QueryChangeFeed( options = {} else: options = dict(options) - options["changeFeed"] = True resource_key_map = {"Documents": "docs"} @@ -1191,11 +1192,10 @@ def fetch_fn(options: Mapping[str, Any]) -> Tuple[List[Dict[str, Any]], Dict[str return ItemPaged( self, - None, options, fetch_function=fetch_fn, collection_link=collection_link, - page_iterator_class=query_iterable.QueryIterable + page_iterator_class=ChangeFeedIterable ) def _ReadPartitionKeyRanges( @@ -3023,6 +3023,11 @@ def __GetBodiesFromQueryResult(result: Dict[str, Any]) -> List[Dict[str, Any]]: options, partition_key_range_id ) + + change_feed_state: Optional[ChangeFeedState] = options.get("changeFeedState") + if change_feed_state is not None: + change_feed_state.populate_request_headers(self._routing_map_provider, headers) + result, last_response_headers = self.__Get(path, request_params, headers, **kwargs) self.last_response_headers = last_response_headers if response_hook: diff --git a/sdk/cosmos/azure-cosmos/azure/cosmos/_execution_context/aio/base_execution_context.py b/sdk/cosmos/azure-cosmos/azure/cosmos/_execution_context/aio/base_execution_context.py index 4ccef73388de..560ca6c05389 100644 --- a/sdk/cosmos/azure-cosmos/azure/cosmos/_execution_context/aio/base_execution_context.py +++ b/sdk/cosmos/azure-cosmos/azure/cosmos/_execution_context/aio/base_execution_context.py @@ -44,7 +44,6 @@ def __init__(self, client, options): """ self._client = client self._options = options - self._is_change_feed = "changeFeed" in options and options["changeFeed"] is True self._continuation = self._get_initial_continuation() self._has_started = False self._has_finished = False @@ -117,10 +116,6 @@ async def _fetch_items_helper_no_retries(self, fetch_function): fetched_items = [] new_options = copy.deepcopy(self._options) while self._continuation or not self._has_started: - # Check if this is first fetch for read from specific time change feed. - # For read specific time the first fetch will return empty even if we have more pages. - is_s_time_first_fetch = self._is_change_feed and self._options.get("startTime") and not self._has_started - new_options["continuation"] = self._continuation response_headers = {} @@ -129,16 +124,8 @@ async def _fetch_items_helper_no_retries(self, fetch_function): self._has_started = True continuation_key = http_constants.HttpHeaders.Continuation - # Use Etag as continuation token for change feed queries. - if self._is_change_feed: - continuation_key = http_constants.HttpHeaders.ETag - # In change feed queries, the continuation token is always populated. The hasNext() test is whether - # there is any items in the response or not. - # No initial fetch for start time change feed, so we need to pass continuation token for first fetch - if not self._is_change_feed or fetched_items or is_s_time_first_fetch: - self._continuation = response_headers.get(continuation_key) - else: - self._continuation = None + self._continuation = response_headers.get(continuation_key) + if fetched_items: break return fetched_items diff --git a/sdk/cosmos/azure-cosmos/azure/cosmos/_execution_context/base_execution_context.py b/sdk/cosmos/azure-cosmos/azure/cosmos/_execution_context/base_execution_context.py index b7ef17898656..23ba3d170994 100644 --- a/sdk/cosmos/azure-cosmos/azure/cosmos/_execution_context/base_execution_context.py +++ b/sdk/cosmos/azure-cosmos/azure/cosmos/_execution_context/base_execution_context.py @@ -42,7 +42,6 @@ def __init__(self, client, options): """ self._client = client self._options = options - self._is_change_feed = "changeFeed" in options and options["changeFeed"] is True self._continuation = self._get_initial_continuation() self._has_started = False self._has_finished = False @@ -115,9 +114,6 @@ def _fetch_items_helper_no_retries(self, fetch_function): fetched_items = [] new_options = copy.deepcopy(self._options) while self._continuation or not self._has_started: - # Check if this is first fetch for read from specific time change feed. - # For read specific time the first fetch will return empty even if we have more pages. - is_s_time_first_fetch = self._is_change_feed and self._options.get("startTime") and not self._has_started if not self._has_started: self._has_started = True new_options["continuation"] = self._continuation @@ -126,16 +122,8 @@ def _fetch_items_helper_no_retries(self, fetch_function): (fetched_items, response_headers) = fetch_function(new_options) continuation_key = http_constants.HttpHeaders.Continuation - # Use Etag as continuation token for change feed queries. - if self._is_change_feed: - continuation_key = http_constants.HttpHeaders.ETag - # In change feed queries, the continuation token is always populated. The hasNext() test is whether - # there is any items in the response or not. - # For start time however we get no initial results, so we need to pass continuation token - if not self._is_change_feed or fetched_items or is_s_time_first_fetch: - self._continuation = response_headers.get(continuation_key) - else: - self._continuation = None + self._continuation = response_headers.get(continuation_key) + if fetched_items: break return fetched_items diff --git a/sdk/cosmos/azure-cosmos/azure/cosmos/_feed_range.py b/sdk/cosmos/azure-cosmos/azure/cosmos/_feed_range.py new file mode 100644 index 000000000000..2bda669b6bc0 --- /dev/null +++ b/sdk/cosmos/azure-cosmos/azure/cosmos/_feed_range.py @@ -0,0 +1,70 @@ +# The MIT License (MIT) +# Copyright (c) 2014 Microsoft Corporation + +# Permission is hereby granted, free of charge, to any person obtaining a copy +# of this software and associated documentation files (the "Software"), to deal +# in the Software without restriction, including without limitation the rights +# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +# copies of the Software, and to permit persons to whom the Software is +# furnished to do so, subject to the following conditions: + +# The above copyright notice and this permission notice shall be included in all +# copies or substantial portions of the Software. + +# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE +# SOFTWARE. + +import base64 +import json +from abc import ABC +from typing import Any, Dict + +from azure.cosmos._change_feed.feed_range_internal import FeedRangeInternalEpk +from azure.cosmos._routing.routing_range import Range + +# pylint: disable=protected-access +class FeedRange(ABC): + """Represents a single feed range in an Azure Cosmos DB SQL API container. + + """ + @staticmethod + def from_string(json_str: str) -> 'FeedRange': + """ + Create a feed range from previously obtained string representation. + + :param str json_str: A string representation of a feed range. + :return: A feed range. + :rtype: ~azure.cosmos.FeedRange + """ + feed_range_json_str = base64.b64decode(json_str).decode('utf-8') + feed_range_json = json.loads(feed_range_json_str) + if feed_range_json.get(FeedRangeEpk.type_property_name): + return FeedRangeEpk._from_json(feed_range_json) + + raise ValueError("Invalid feed range base64 encoded string [Wrong feed range type]") + +class FeedRangeEpk(FeedRange): + type_property_name = "Range" + + def __init__(self, feed_range: Range) -> None: + if feed_range is None: + raise ValueError("feed_range cannot be None") + + self._feed_range_internal = FeedRangeInternalEpk(feed_range) + + def __str__(self) -> str: + """Get a json representation of the feed range. + The returned json string can be used to create a new feed range from it. + + :return: A json representation of the feed range. + """ + return self._feed_range_internal.__str__() + + @classmethod + def _from_json(cls, data: Dict[str, Any]) -> 'FeedRange': + return cls(FeedRangeInternalEpk.from_json(data)._range) diff --git a/sdk/cosmos/azure-cosmos/azure/cosmos/_routing/aio/routing_map_provider.py b/sdk/cosmos/azure-cosmos/azure/cosmos/_routing/aio/routing_map_provider.py index ebf1ee82b005..e70ae355c495 100644 --- a/sdk/cosmos/azure-cosmos/azure/cosmos/_routing/aio/routing_map_provider.py +++ b/sdk/cosmos/azure-cosmos/azure/cosmos/_routing/aio/routing_map_provider.py @@ -49,7 +49,7 @@ def __init__(self, client): # keeps the cached collection routing map by collection id self._collection_routing_map_by_item = {} - async def get_overlapping_ranges(self, collection_link, partition_key_ranges): + async def get_overlapping_ranges(self, collection_link, partition_key_ranges, **kwargs): """Given a partition key range and a collection, return the list of overlapping partition key ranges. @@ -64,7 +64,7 @@ async def get_overlapping_ranges(self, collection_link, partition_key_ranges): collection_routing_map = self._collection_routing_map_by_item.get(collection_id) if collection_routing_map is None: - collection_pk_ranges = [pk async for pk in cl._ReadPartitionKeyRanges(collection_link)] + collection_pk_ranges = [pk async for pk in cl._ReadPartitionKeyRanges(collection_link, **kwargs)] # for large collections, a split may complete between the read partition key ranges query page responses, # causing the partitionKeyRanges to have both the children ranges and their parents. Therefore, we need # to discard the parent ranges to have a valid routing map. @@ -131,7 +131,7 @@ class SmartRoutingMapProvider(PartitionKeyRangeCache): invocation of CollectionRoutingMap.get_overlapping_ranges() """ - async def get_overlapping_ranges(self, collection_link, partition_key_ranges): + async def get_overlapping_ranges(self, collection_link, partition_key_ranges, **kwargs): """ Given the sorted ranges and a collection, Returns the list of overlapping partition key ranges @@ -165,8 +165,12 @@ async def get_overlapping_ranges(self, collection_link, partition_key_ranges): else: queryRange = currentProvidedRange - overlappingRanges = await PartitionKeyRangeCache.get_overlapping_ranges(self, - collection_link, queryRange) + overlappingRanges =\ + await PartitionKeyRangeCache.get_overlapping_ranges( + self, + collection_link, + [queryRange], + **kwargs) assert overlappingRanges, "code bug: returned overlapping ranges for queryRange {} is empty".format( queryRange ) diff --git a/sdk/cosmos/azure-cosmos/azure/cosmos/_routing/routing_map_provider.py b/sdk/cosmos/azure-cosmos/azure/cosmos/_routing/routing_map_provider.py index 59c609dec7ea..8dacb5190e07 100644 --- a/sdk/cosmos/azure-cosmos/azure/cosmos/_routing/routing_map_provider.py +++ b/sdk/cosmos/azure-cosmos/azure/cosmos/_routing/routing_map_provider.py @@ -50,7 +50,7 @@ def __init__(self, client): # keeps the cached collection routing map by collection id self._collection_routing_map_by_item = {} - def get_overlapping_ranges(self, collection_link, partition_key_ranges): + def get_overlapping_ranges(self, collection_link, partition_key_ranges, **kwargs): """Given a partition key range and a collection, return the list of overlapping partition key ranges. @@ -65,7 +65,7 @@ def get_overlapping_ranges(self, collection_link, partition_key_ranges): collection_routing_map = self._collection_routing_map_by_item.get(collection_id) if collection_routing_map is None: - collection_pk_ranges = list(cl._ReadPartitionKeyRanges(collection_link)) + collection_pk_ranges = list(cl._ReadPartitionKeyRanges(collection_link, **kwargs)) # for large collections, a split may complete between the read partition key ranges query page responses, # causing the partitionKeyRanges to have both the children ranges and their parents. Therefore, we need # to discard the parent ranges to have a valid routing map. @@ -132,7 +132,7 @@ class SmartRoutingMapProvider(PartitionKeyRangeCache): invocation of CollectionRoutingMap.get_overlapping_ranges() """ - def get_overlapping_ranges(self, collection_link, partition_key_ranges): + def get_overlapping_ranges(self, collection_link, partition_key_ranges, **kwargs): """ Given the sorted ranges and a collection, Returns the list of overlapping partition key ranges @@ -166,7 +166,8 @@ def get_overlapping_ranges(self, collection_link, partition_key_ranges): else: queryRange = currentProvidedRange - overlappingRanges = PartitionKeyRangeCache.get_overlapping_ranges(self, collection_link, queryRange) + overlappingRanges = ( + PartitionKeyRangeCache.get_overlapping_ranges(self, collection_link, [queryRange], **kwargs)) assert overlappingRanges, "code bug: returned overlapping ranges for queryRange {} is empty".format( queryRange ) diff --git a/sdk/cosmos/azure-cosmos/azure/cosmos/_routing/routing_range.py b/sdk/cosmos/azure-cosmos/azure/cosmos/_routing/routing_range.py index 0d61fbbbe1d7..a2d789f20644 100644 --- a/sdk/cosmos/azure-cosmos/azure/cosmos/_routing/routing_range.py +++ b/sdk/cosmos/azure-cosmos/azure/cosmos/_routing/routing_range.py @@ -22,6 +22,9 @@ """Internal class for partition key range implementation in the Azure Cosmos database service. """ +import base64 +import binascii +import json class PartitionKeyRange(object): @@ -81,6 +84,74 @@ def ParseFromDict(cls, range_as_dict): ) return self + def to_dict(self): + return { + self.MinPath: self.min, + self.MaxPath: self.max, + self.IsMinInclusivePath: self.isMinInclusive, + self.IsMaxInclusivePath: self.isMaxInclusive + } + + def to_normalized_range(self): + if self.isMinInclusive and not self.isMaxInclusive: + return self + + normalized_min = self.min + normalized_max = self.max + + if not self.isMinInclusive: + normalized_min = self.add_to_effective_partition_key(self.min, -1) + + if self.isMaxInclusive: + normalized_max = self.add_to_effective_partition_key(self.max, 1) + + return Range(normalized_min, normalized_max, True, False) + + def add_to_effective_partition_key(self, effective_partition_key: str, value: int): + if value not in (-1, 1): + raise ValueError("Invalid value - only 1 or -1 is allowed") + + byte_array = self.hex_binary_to_byte_array(effective_partition_key) + if value == 1: + for i in range(len(byte_array) -1, -1, -1): + if byte_array[i] < 255: + byte_array[i] += 1 + break + byte_array[i] = 0 + else: + for i in range(len(byte_array) - 1, -1, -1): + if byte_array[i] != 0: + byte_array[i] -= 1 + break + byte_array[i] = 255 + + return binascii.hexlify(byte_array).decode() + + def hex_binary_to_byte_array(self, hex_binary_string: str): + if hex_binary_string is None: + raise ValueError("hex_binary_string is missing") + if len(hex_binary_string) % 2 != 0: + raise ValueError("hex_binary_string must not have an odd number of characters") + + return bytearray.fromhex(hex_binary_string) + + @classmethod + def from_base64_encoded_json_string(cls, data: str): + try: + feed_range_json_string = base64.b64decode(data, validate=True).decode('utf-8') + feed_range_json = json.loads(feed_range_json_string) + return cls.ParseFromDict(feed_range_json) + except Exception as exc: + raise ValueError(f"Invalid feed_range json string {data}") from exc + + def to_base64_encoded_string(self): + data_json = json.dumps(self.to_dict()) + json_bytes = data_json.encode('utf-8') + # Encode the bytes to a Base64 string + base64_bytes = base64.b64encode(json_bytes) + # Convert the Base64 bytes to a string + return base64_bytes.decode('utf-8') + def isSingleValue(self): return self.isMinInclusive and self.isMaxInclusive and self.min == self.max diff --git a/sdk/cosmos/azure-cosmos/azure/cosmos/aio/_container.py b/sdk/cosmos/azure-cosmos/azure/cosmos/aio/_container.py index c58e9cbf5351..3f879ded3187 100644 --- a/sdk/cosmos/azure-cosmos/azure/cosmos/aio/_container.py +++ b/sdk/cosmos/azure-cosmos/azure/cosmos/aio/_container.py @@ -21,8 +21,9 @@ """Create, read, update and delete items in the Azure Cosmos DB SQL API service. """ -from datetime import datetime, timezone -from typing import Any, Dict, Mapping, Optional, Sequence, Type, Union, List, Tuple, cast +import warnings +from datetime import datetime +from typing import Any, Dict, Mapping, Optional, Sequence, Type, Union, List, Tuple, cast, overload from typing_extensions import Literal from azure.core import MatchConditions @@ -31,6 +32,7 @@ from azure.core.tracing.decorator_async import distributed_trace_async # type: ignore from ._cosmos_client_connection_async import CosmosClientConnection +from ._scripts import ScriptsProxy from .._base import ( build_options as _build_options, validate_cache_staleness_value, @@ -39,19 +41,21 @@ GenerateGuidId, _set_properties_cache ) +from .._feed_range import FeedRange, FeedRangeEpk +from .._routing.routing_range import Range from ..offer import ThroughputProperties -from ._scripts import ScriptsProxy from ..partition_key import ( NonePartitionKeyValue, _return_undefined_or_empty_partition_key, _Empty, - _Undefined + _Undefined, PartitionKey ) __all__ = ("ContainerProxy",) # pylint: disable=protected-access, too-many-lines # pylint: disable=missing-client-constructor-parameter-credential,missing-client-constructor-parameter-kwargs +# pylint: disable=too-many-public-methods PartitionKeyType = Union[str, int, float, bool, Sequence[Union[str, int, float, bool, None]], Type[NonePartitionKeyValue]] # pylint: disable=line-too-long @@ -132,6 +136,14 @@ async def _set_partition_key( return _return_undefined_or_empty_partition_key(await self.is_system_key) return cast(Union[str, int, float, bool, List[Union[str, int, float, bool]]], partition_key) + async def _get_epk_range_for_partition_key(self, partition_key_value: PartitionKeyType) -> Range: + + container_properties = await self._get_properties() + partition_key_definition = container_properties["partitionKey"] + partition_key = PartitionKey(path=partition_key_definition["paths"], kind=partition_key_definition["kind"]) + + return partition_key._get_epk_range_for_partition_key(partition_key_value) + @distributed_trace_async async def read( self, @@ -486,62 +498,216 @@ def query_items( response_hook(self.client_connection.last_response_headers, items) return items - @distributed_trace + @overload def query_items_change_feed( - self, - *, - partition_key_range_id: Optional[str] = None, - is_start_from_beginning: bool = False, - start_time: Optional[datetime] = None, - continuation: Optional[str] = None, - max_item_count: Optional[int] = None, - partition_key: Optional[PartitionKeyType] = None, - priority: Optional[Literal["High", "Low"]] = None, - **kwargs: Any + self, + *, + max_item_count: Optional[int] = None, + start_time: Optional[Union[datetime, Literal["Now", "Beginning"]]] = None, + partition_key: PartitionKeyType, + priority: Optional[Literal["High", "Low"]] = None, + **kwargs: Any ) -> AsyncItemPaged[Dict[str, Any]]: """Get a sorted list of items that were changed, in the order in which they were modified. - :keyword bool is_start_from_beginning: Get whether change feed should start from - beginning (true) or from current (false). By default, it's start from current (false). - :keyword ~datetime.datetime start_time: Specifies a point of time to start change feed. Provided value will be - converted to UTC. This value will be ignored if `is_start_from_beginning` is set to true. - :keyword str partition_key_range_id: ChangeFeed requests can be executed against specific partition key - ranges. This is used to process the change feed in parallel across multiple consumers. - :keyword str continuation: e_tag value to be used as continuation for reading change feed. :keyword int max_item_count: Max number of items to be returned in the enumeration operation. - :keyword partition_key: partition key at which ChangeFeed requests are targeted. - :paramtype partition_key: Union[str, int, float, bool, List[Union[str, int, float, bool]]] - :keyword response_hook: A callable invoked with the response metadata. - :paramtype response_hook: Callable[[Dict[str, str], AsyncItemPaged[Dict[str, Any]]], None] + :keyword start_time: The start time to start processing chang feed items. + Beginning: Processing the change feed items from the beginning of the change feed. + Now: Processing change feed from the current time, so only events for all future changes will be retrieved. + ~datetime.datetime: processing change feed from a point of time. Provided value will be converted to UTC. + By default, it is start from current ("Now") + :type start_time: Union[~datetime.datetime, Literal["Now", "Beginning"]] + :keyword partition_key: The partition key that is used to define the scope + (logical partition or a subset of a container) + :type partition_key: Union[str, int, float, bool, List[Union[str, int, float, bool]]] :keyword Literal["High", "Low"] priority: Priority based execution allows users to set a priority for each request. Once the user has reached their provisioned throughput, low priority requests are throttled before high priority requests start getting throttled. Feature must first be enabled at the account level. + :keyword Callable response_hook: A callable invoked with the response metadata. :returns: An AsyncItemPaged of items (dicts). :rtype: AsyncItemPaged[Dict[str, Any]] """ - response_hook = kwargs.pop('response_hook', None) - if priority is not None: - kwargs['priority'] = priority + ... + + @overload + def query_items_change_feed( + self, + *, + feed_range: FeedRange, + max_item_count: Optional[int] = None, + start_time: Optional[Union[datetime, Literal["Now", "Beginning"]]] = None, + priority: Optional[Literal["High", "Low"]] = None, + **kwargs: Any + ) -> AsyncItemPaged[Dict[str, Any]]: + """Get a sorted list of items that were changed, in the order in which they were modified. + + :keyword feed_range: The feed range that is used to define the scope. + :type feed_range: ~azure.cosmos.FeedRange + :keyword int max_item_count: Max number of items to be returned in the enumeration operation. + :keyword start_time: The start time to start processing chang feed items. + Beginning: Processing the change feed items from the beginning of the change feed. + Now: Processing change feed from the current time, so only events for all future changes will be retrieved. + ~datetime.datetime: processing change feed from a point of time. Provided value will be converted to UTC. + By default, it is start from current ("Now") + :type start_time: Union[~datetime.datetime, Literal["Now", "Beginning"]] + :keyword Literal["High", "Low"] priority: Priority based execution allows users to set a priority for each + request. Once the user has reached their provisioned throughput, low priority requests are throttled + before high priority requests start getting throttled. Feature must first be enabled at the account level. + :keyword Callable response_hook: A callable invoked with the response metadata. + :returns: An AsyncItemPaged of items (dicts). + :rtype: AsyncItemPaged[Dict[str, Any]] + """ + ... + + @overload + def query_items_change_feed( + self, + *, + continuation: str, + max_item_count: Optional[int] = None, + priority: Optional[Literal["High", "Low"]] = None, + **kwargs: Any + ) -> AsyncItemPaged[Dict[str, Any]]: + """Get a sorted list of items that were changed, in the order in which they were modified. + + :keyword str continuation: The continuation token retrieved from previous response. + :keyword int max_item_count: Max number of items to be returned in the enumeration operation. + :keyword Literal["High", "Low"] priority: Priority based execution allows users to set a priority for each + request. Once the user has reached their provisioned throughput, low priority requests are throttled + before high priority requests start getting throttled. Feature must first be enabled at the account level. + :keyword Callable response_hook: A callable invoked with the response metadata. + :returns: An AsyncItemPaged of items (dicts). + :rtype: AsyncItemPaged[Dict[str, Any]] + """ + # pylint: enable=line-too-long + ... + + @overload + def query_items_change_feed( + self, + *, + max_item_count: Optional[int] = None, + start_time: Optional[Union[datetime, Literal["Now", "Beginning"]]] = None, + priority: Optional[Literal["High", "Low"]] = None, + **kwargs: Any + ) -> AsyncItemPaged[Dict[str, Any]]: + """Get a sorted list of items that were changed in the entire container, + in the order in which they were modified. + + :keyword int max_item_count: Max number of items to be returned in the enumeration operation. + :keyword start_time: The start time to start processing chang feed items. + Beginning: Processing the change feed items from the beginning of the change feed. + Now: Processing change feed from the current time, so only events for all future changes will be retrieved. + ~datetime.datetime: processing change feed from a point of time. Provided value will be converted to UTC. + By default, it is start from current ("Now") + :type start_time: Union[~datetime.datetime, Literal["Now", "Beginning"]] + :keyword Literal["High", "Low"] priority: Priority based execution allows users to set a priority for each + request. Once the user has reached their provisioned throughput, low priority requests are throttled + before high priority requests start getting throttled. Feature must first be enabled at the account level. + :keyword Callable response_hook: A callable invoked with the response metadata. + :returns: An AsyncItemPaged of items (dicts). + :rtype: AsyncItemPaged[Dict[str, Any]] + """ + ... + + @distributed_trace + def query_items_change_feed( # pylint: disable=unused-argument + self, + *args: Any, + **kwargs: Any + ) -> AsyncItemPaged[Dict[str, Any]]: + + """Get a sorted list of items that were changed, in the order in which they were modified. + + :keyword str continuation: The continuation token retrieved from previous response. + :keyword feed_range: The feed range that is used to define the scope. + :type feed_range: ~azure.cosmos.FeedRange + :keyword partition_key: The partition key that is used to define the scope + (logical partition or a subset of a container) + :type partition_key: Union[str, int, float, bool, List[Union[str, int, float, bool]]] + :keyword int max_item_count: Max number of items to be returned in the enumeration operation. + :keyword start_time: The start time to start processing chang feed items. + Beginning: Processing the change feed items from the beginning of the change feed. + Now: Processing change feed from the current time, so only events for all future changes will be retrieved. + ~datetime.datetime: processing change feed from a point of time. Provided value will be converted to UTC. + By default, it is start from current ("Now") + :type start_time: Union[~datetime.datetime, Literal["Now", "Beginning"]] + :keyword Literal["High", "Low"] priority: Priority based execution allows users to set a priority for each + request. Once the user has reached their provisioned throughput, low priority requests are throttled + before high priority requests start getting throttled. Feature must first be enabled at the account level. + :keyword Callable response_hook: A callable invoked with the response metadata. + :param Any args: args + :returns: An Iterable of items (dicts). + :rtype: Iterable[Dict[str, Any]] + """ + # pylint: disable=too-many-statements + if kwargs.get("priority") is not None: + kwargs['priority'] = kwargs['priority'] feed_options = _build_options(kwargs) - feed_options["isStartFromBeginning"] = is_start_from_beginning - if start_time is not None and is_start_from_beginning is False: - feed_options["startTime"] = start_time.astimezone(timezone.utc).strftime('%a, %d %b %Y %H:%M:%S GMT') - if partition_key_range_id is not None: - feed_options["partitionKeyRangeId"] = partition_key_range_id - if partition_key is not None: - feed_options["partitionKey"] = self._set_partition_key(partition_key) - if max_item_count is not None: - feed_options["maxItemCount"] = max_item_count - if continuation is not None: - feed_options["continuation"] = continuation + + change_feed_state_context = {} + # Back compatibility with deprecation warnings for partition_key_range_id + if kwargs.get("partition_key_range_id") is not None: + warnings.warn( + "partition_key_range_id is deprecated. Please pass in feed_range instead.", + DeprecationWarning + ) + + change_feed_state_context["partitionKeyRangeId"] = kwargs.pop('partition_key_range_id') + + # Back compatibility with deprecation warnings for is_start_from_beginning + if kwargs.get("is_start_from_beginning") is not None: + warnings.warn( + "is_start_from_beginning is deprecated. Please pass in start_time instead.", + DeprecationWarning + ) + + if kwargs.get("start_time") is not None: + raise ValueError("is_start_from_beginning and start_time are exclusive, please only set one of them") + + is_start_from_beginning = kwargs.pop('is_start_from_beginning') + if is_start_from_beginning is True: + change_feed_state_context["startTime"] = "Beginning" + + # parse start_time + if kwargs.get("start_time") is not None: + start_time = kwargs.pop('start_time') + if not isinstance(start_time, (datetime, str)): + raise TypeError( + "'start_time' must be either a datetime object, or either the values 'Now' or 'Beginning'.") + change_feed_state_context["startTime"] = start_time + + # parse continuation token + if feed_options.get("continuation") is not None: + change_feed_state_context["continuation"] = feed_options.pop('continuation') + + if kwargs.get("max_item_count") is not None: + feed_options["maxItemCount"] = kwargs.pop('max_item_count') + + if kwargs.get("partition_key") is not None: + change_feed_state_context["partitionKey"] =\ + self._set_partition_key(cast(PartitionKeyType, kwargs.get("partition_key"))) + change_feed_state_context["partitionKeyFeedRange"] = \ + self._get_epk_range_for_partition_key(kwargs.pop('partition_key')) + + if kwargs.get("feed_range") is not None: + feed_range: FeedRangeEpk = kwargs.pop('feed_range') + change_feed_state_context["feedRange"] = feed_range._feed_range_internal + + feed_options["containerProperties"] = self._get_properties() + feed_options["changeFeedStateContext"] = change_feed_state_context + + response_hook = kwargs.pop('response_hook', None) if hasattr(response_hook, "clear"): response_hook.clear() + if self.container_link in self.__get_client_container_caches(): feed_options["containerRID"] = self.__get_client_container_caches()[self.container_link]["_rid"] result = self.client_connection.QueryItemsChangeFeed( self.container_link, options=feed_options, response_hook=response_hook, **kwargs ) + if response_hook: response_hook(self.client_connection.last_response_headers, result) return result @@ -1126,3 +1292,30 @@ async def execute_item_batch( return await self.client_connection.Batch( collection_link=self.container_link, batch_operations=batch_operations, options=request_options, **kwargs) + + async def read_feed_ranges( + self, + *, + force_refresh: Optional[bool] = False, + **kwargs: Any + ) -> List[FeedRange]: + """ Obtains a list of feed ranges that can be used to parallelize feed operations. + + :keyword bool force_refresh: + Flag to indicate whether obtain the list of feed ranges directly from cache or refresh the cache. + :returns: A list representing the feed ranges in base64 encoded string + :rtype: List[str] + + """ + if force_refresh is True: + self.client_connection.refresh_routing_map_provider() + + partition_key_ranges =\ + await self.client_connection._routing_map_provider.get_overlapping_ranges( + self.container_link, + # default to full range + [Range("", "FF", True, False)], + **kwargs) + + return [FeedRangeEpk(Range.PartitionKeyRangeToRange(partitionKeyRange)) + for partitionKeyRange in partition_key_ranges] diff --git a/sdk/cosmos/azure-cosmos/azure/cosmos/aio/_cosmos_client_connection_async.py b/sdk/cosmos/azure-cosmos/azure/cosmos/aio/_cosmos_client_connection_async.py index 72ea03668909..eeb67225660a 100644 --- a/sdk/cosmos/azure-cosmos/azure/cosmos/aio/_cosmos_client_connection_async.py +++ b/sdk/cosmos/azure-cosmos/azure/cosmos/aio/_cosmos_client_connection_async.py @@ -50,6 +50,8 @@ from .. import _base as base from .._base import _set_properties_cache from .. import documents +from .._change_feed.aio.change_feed_iterable import ChangeFeedIterable +from .._change_feed.change_feed_state import ChangeFeedState from .._routing import routing_range from ..documents import ConnectionPolicy, DatabaseAccount from .._constants import _Constants as Constants @@ -2275,7 +2277,6 @@ def _QueryChangeFeed( options = {} else: options = dict(options) - options["changeFeed"] = True resource_key_map = {"Documents": "docs"} @@ -2310,11 +2311,10 @@ async def fetch_fn(options: Mapping[str, Any]) -> Tuple[List[Dict[str, Any]], Di return AsyncItemPaged( self, - None, options, fetch_function=fetch_fn, collection_link=collection_link, - page_iterator_class=query_iterable.QueryIterable + page_iterator_class=ChangeFeedIterable ) def QueryOffers( @@ -2812,6 +2812,11 @@ def __GetBodiesFromQueryResult(result: Dict[str, Any]) -> List[Dict[str, Any]]: documents._OperationType.QueryPlan if is_query_plan else documents._OperationType.ReadFeed ) headers = base.GetHeaders(self, initial_headers, "get", path, id_, typ, options, partition_key_range_id) + + change_feed_state: Optional[ChangeFeedState] = options.get("changeFeedState") + if change_feed_state is not None: + await change_feed_state.populate_request_headers_async(self._routing_map_provider, headers) + result, self.last_response_headers = await self.__Get(path, request_params, headers, **kwargs) if response_hook: response_hook(self.last_response_headers, result) diff --git a/sdk/cosmos/azure-cosmos/azure/cosmos/container.py b/sdk/cosmos/azure-cosmos/azure/cosmos/container.py index 5100eb6fe57f..e602aca419b9 100644 --- a/sdk/cosmos/azure-cosmos/azure/cosmos/container.py +++ b/sdk/cosmos/azure-cosmos/azure/cosmos/container.py @@ -21,16 +21,15 @@ """Create, read, update and delete items in the Azure Cosmos DB SQL API service. """ -from datetime import datetime, timezone import warnings -from typing import Any, Dict, List, Optional, Sequence, Union, Tuple, Mapping, Type, cast +from datetime import datetime +from typing import Any, Dict, List, Optional, Sequence, Union, Tuple, Mapping, Type, cast, overload from typing_extensions import Literal from azure.core import MatchConditions -from azure.core.tracing.decorator import distributed_trace from azure.core.paging import ItemPaged +from azure.core.tracing.decorator import distributed_trace -from ._cosmos_client_connection import CosmosClientConnection from ._base import ( build_options, validate_cache_staleness_value, @@ -39,8 +38,10 @@ GenerateGuidId, _set_properties_cache ) +from ._cosmos_client_connection import CosmosClientConnection +from ._feed_range import FeedRange, FeedRangeEpk +from ._routing.routing_range import Range from .offer import Offer, ThroughputProperties -from .scripts import ScriptsProxy from .partition_key import ( NonePartitionKeyValue, PartitionKey, @@ -48,6 +49,7 @@ _Undefined, _return_undefined_or_empty_partition_key ) +from .scripts import ScriptsProxy __all__ = ("ContainerProxy",) @@ -129,6 +131,13 @@ def _set_partition_key( return _return_undefined_or_empty_partition_key(self.is_system_key) return cast(Union[str, int, float, bool, List[Union[str, int, float, bool]]], partition_key) + def _get_epk_range_for_partition_key( self, partition_key_value: PartitionKeyType) -> Range: + container_properties = self._get_properties() + partition_key_definition = container_properties["partitionKey"] + partition_key = PartitionKey(path=partition_key_definition["paths"], kind=partition_key_definition["kind"]) + + return partition_key._get_epk_range_for_partition_key(partition_key_value) + def __get_client_container_caches(self) -> Dict[str, Dict[str, Any]]: return self.client_connection._container_properties_cache @@ -309,60 +318,226 @@ def read_all_items( # pylint:disable=docstring-missing-param response_hook(self.client_connection.last_response_headers, items) return items - @distributed_trace + @overload def query_items_change_feed( - self, - partition_key_range_id: Optional[str] = None, - is_start_from_beginning: bool = False, - continuation: Optional[str] = None, - max_item_count: Optional[int] = None, - *, - start_time: Optional[datetime] = None, - partition_key: Optional[PartitionKeyType] = None, - priority: Optional[Literal["High", "Low"]] = None, - **kwargs: Any + self, + *, + max_item_count: Optional[int] = None, + start_time: Optional[Union[datetime, Literal["Now", "Beginning"]]] = None, + partition_key: PartitionKeyType, + priority: Optional[Literal["High", "Low"]] = None, + **kwargs: Any ) -> ItemPaged[Dict[str, Any]]: """Get a sorted list of items that were changed, in the order in which they were modified. - :param str partition_key_range_id: ChangeFeed requests can be executed against specific partition key ranges. - This is used to process the change feed in parallel across multiple consumers. - :param bool is_start_from_beginning: Get whether change feed should start from - beginning (true) or from current (false). By default, it's start from current (false). - :param max_item_count: Max number of items to be returned in the enumeration operation. - :param str continuation: e_tag value to be used as continuation for reading change feed. - :param int max_item_count: Max number of items to be returned in the enumeration operation. - :keyword ~datetime.datetime start_time: Specifies a point of time to start change feed. Provided value will be - converted to UTC. This value will be ignored if `is_start_from_beginning` is set to true. - :keyword partition_key: partition key at which ChangeFeed requests are targeted. - :paramtype partition_key: Union[str, int, float, bool, List[Union[str, int, float, bool]]] + :keyword int max_item_count: Max number of items to be returned in the enumeration operation. + :keyword start_time:The start time to start processing chang feed items. + Beginning: Processing the change feed items from the beginning of the change feed. + Now: Processing change feed from the current time, so only events for all future changes will be retrieved. + ~datetime.datetime: processing change feed from a point of time. Provided value will be converted to UTC. + By default, it is start from current ("Now") + :type start_time: Union[~datetime.datetime, Literal["Now", "Beginning"]] + :keyword partition_key: The partition key that is used to define the scope + (logical partition or a subset of a container) + :type partition_key: Union[str, int, float, bool, List[Union[str, int, float, bool]]] + :keyword Literal["High", "Low"] priority: Priority based execution allows users to set a priority for each + request. Once the user has reached their provisioned throughput, low priority requests are throttled + before high priority requests start getting throttled. Feature must first be enabled at the account level. + :keyword Callable response_hook: A callable invoked with the response metadata. + :returns: An Iterable of items (dicts). + :rtype: Iterable[Dict[str, Any]] + """ + ... + + @overload + def query_items_change_feed( + self, + *, + feed_range: FeedRange, + max_item_count: Optional[int] = None, + start_time: Optional[Union[datetime, Literal["Now", "Beginning"]]] = None, + priority: Optional[Literal["High", "Low"]] = None, + **kwargs: Any + ) -> ItemPaged[Dict[str, Any]]: + + """Get a sorted list of items that were changed, in the order in which they were modified. + + :keyword feed_range: The feed range that is used to define the scope. + :type feed_range: ~azure.cosmos.FeedRange + :keyword int max_item_count: Max number of items to be returned in the enumeration operation. + :keyword start_time: The start time to start processing chang feed items. + Beginning: Processing the change feed items from the beginning of the change feed. + Now: Processing change feed from the current time, so only events for all future changes will be retrieved. + ~datetime.datetime: processing change feed from a point of time. Provided value will be converted to UTC. + By default, it is start from current ("Now") + :type start_time: Union[~datetime.datetime, Literal["Now", "Beginning"]] + :keyword Literal["High", "Low"] priority: Priority based execution allows users to set a priority for each + request. Once the user has reached their provisioned throughput, low priority requests are throttled + before high priority requests start getting throttled. Feature must first be enabled at the account level. :keyword Callable response_hook: A callable invoked with the response metadata. + :returns: An Iterable of items (dicts). + :rtype: Iterable[Dict[str, Any]] + """ + ... + + @overload + def query_items_change_feed( + self, + *, + continuation: str, + max_item_count: Optional[int] = None, + priority: Optional[Literal["High", "Low"]] = None, + **kwargs: Any + ) -> ItemPaged[Dict[str, Any]]: + """Get a sorted list of items that were changed, in the order in which they were modified. + + :keyword str continuation: The continuation token retrieved from previous response. + :keyword int max_item_count: Max number of items to be returned in the enumeration operation. :keyword Literal["High", "Low"] priority: Priority based execution allows users to set a priority for each request. Once the user has reached their provisioned throughput, low priority requests are throttled before high priority requests start getting throttled. Feature must first be enabled at the account level. + :keyword Callable response_hook: A callable invoked with the response metadata. :returns: An Iterable of items (dicts). - :rtype: Iterable[dict[str, Any]] + :rtype: Iterable[Dict[str, Any]] """ - if priority is not None: - kwargs['priority'] = priority + ... + + @overload + def query_items_change_feed( + self, + *, + max_item_count: Optional[int] = None, + start_time: Optional[Union[datetime, Literal["Now", "Beginning"]]] = None, + priority: Optional[Literal["High", "Low"]] = None, + **kwargs: Any + ) -> ItemPaged[Dict[str, Any]]: + """Get a sorted list of items that were changed in the entire container, + in the order in which they were modified, + + :keyword int max_item_count: Max number of items to be returned in the enumeration operation. + :keyword start_time:The start time to start processing chang feed items. + Beginning: Processing the change feed items from the beginning of the change feed. + Now: Processing change feed from the current time, so only events for all future changes will be retrieved. + ~datetime.datetime: processing change feed from a point of time. Provided value will be converted to UTC. + By default, it is start from current ("Now") + :type start_time: Union[~datetime.datetime, Literal["Now", "Beginning"]] + :keyword Literal["High", "Low"] priority: Priority based execution allows users to set a priority for each + request. Once the user has reached their provisioned throughput, low priority requests are throttled + before high priority requests start getting throttled. Feature must first be enabled at the account level. + :keyword Callable response_hook: A callable invoked with the response metadata. + :returns: An Iterable of items (dicts). + :rtype: Iterable[Dict[str, Any]] + """ + ... + + @distributed_trace + def query_items_change_feed( + self, + *args: Any, + **kwargs: Any + ) -> ItemPaged[Dict[str, Any]]: + + """Get a sorted list of items that were changed, in the order in which they were modified. + + :keyword str continuation: The continuation token retrieved from previous response. + :keyword feed_range: The feed range that is used to define the scope. + :type feed_range: ~azure.cosmos.FeedRange + :keyword partition_key: The partition key that is used to define the scope + (logical partition or a subset of a container) + :type partition_key: Union[str, int, float, bool, List[Union[str, int, float, bool]]] + :keyword int max_item_count: Max number of items to be returned in the enumeration operation. + :keyword start_time: The start time to start processing chang feed items. + Beginning: Processing the change feed items from the beginning of the change feed. + Now: Processing change feed from the current time, so only events for all future changes will be retrieved. + ~datetime.datetime: processing change feed from a point of time. Provided value will be converted to UTC. + By default, it is start from current ("Now") + :type start_time: Union[~datetime.datetime, Literal["Now", "Beginning"]] + :keyword Literal["High", "Low"] priority: Priority based execution allows users to set a priority for each + request. Once the user has reached their provisioned throughput, low priority requests are throttled + before high priority requests start getting throttled. Feature must first be enabled at the account level. + :keyword Callable response_hook: A callable invoked with the response metadata. + :param Any args: args + :returns: An Iterable of items (dicts). + :rtype: Iterable[Dict[str, Any]] + """ + + # pylint: disable=too-many-statements + if kwargs.get("priority") is not None: + kwargs['priority'] = kwargs['priority'] feed_options = build_options(kwargs) - response_hook = kwargs.pop('response_hook', None) - if partition_key_range_id is not None: - feed_options["partitionKeyRangeId"] = partition_key_range_id - if partition_key is not None: - feed_options["partitionKey"] = self._set_partition_key(partition_key) - if is_start_from_beginning is not None: - feed_options["isStartFromBeginning"] = is_start_from_beginning - if start_time is not None and is_start_from_beginning is False: - feed_options["startTime"] = start_time.astimezone(timezone.utc).strftime('%a, %d %b %Y %H:%M:%S GMT') - if max_item_count is not None: - feed_options["maxItemCount"] = max_item_count - if continuation is not None: - feed_options["continuation"] = continuation + change_feed_state_context = {} + # Back compatibility with deprecation warnings for partition_key_range_id + if (args and args[0] is not None) or kwargs.get("partition_key_range_id") is not None: + warnings.warn( + "partition_key_range_id is deprecated. Please pass in feed_range instead.", + DeprecationWarning + ) + + try: + change_feed_state_context["partitionKeyRangeId"] = kwargs.pop('partition_key_range_id') + except KeyError: + change_feed_state_context['partitionKeyRangeId'] = args[0] + + # Back compatibility with deprecation warnings for is_start_from_beginning + if (len(args) >= 2 and args[1] is not None) or kwargs.get("is_start_from_beginning") is not None: + warnings.warn( + "is_start_from_beginning is deprecated. Please pass in start_time instead.", + DeprecationWarning + ) + + if kwargs.get("start_time") is not None: + raise ValueError("is_start_from_beginning and start_time are exclusive, please only set one of them") + + try: + is_start_from_beginning = kwargs.pop('is_start_from_beginning') + except KeyError: + is_start_from_beginning = args[1] + + if is_start_from_beginning is True: + change_feed_state_context["startTime"] = "Beginning" + + # parse start_time + if kwargs.get("start_time") is not None: + + start_time = kwargs.pop('start_time') + if not isinstance(start_time, (datetime, str)): + raise TypeError( + "'start_time' must be either a datetime object, or either the values 'Now' or 'Beginning'.") + change_feed_state_context["startTime"] = start_time + + # parse continuation token + if len(args) >= 3 and args[2] is not None or feed_options.get("continuation") is not None: + try: + continuation = feed_options.pop('continuation') + except KeyError: + continuation = args[2] + change_feed_state_context["continuation"] = continuation + + if len(args) >= 4 and args[3] is not None or kwargs.get("max_item_count") is not None: + try: + feed_options["maxItemCount"] = kwargs.pop('max_item_count') + except KeyError: + feed_options["maxItemCount"] = args[3] + + if kwargs.get("partition_key") is not None: + change_feed_state_context["partitionKey"] =\ + self._set_partition_key(cast(PartitionKeyType, kwargs.get('partition_key'))) + change_feed_state_context["partitionKeyFeedRange"] =\ + self._get_epk_range_for_partition_key(kwargs.pop('partition_key')) + + if kwargs.get("feed_range") is not None: + feed_range: FeedRangeEpk = kwargs.pop('feed_range') + change_feed_state_context["feedRange"] = feed_range._feed_range_internal + + container_properties = self._get_properties() + feed_options["changeFeedStateContext"] = change_feed_state_context + feed_options["containerRID"] = container_properties["_rid"] + + response_hook = kwargs.pop('response_hook', None) if hasattr(response_hook, "clear"): response_hook.clear() - if self.container_link in self.__get_client_container_caches(): - feed_options["containerRID"] = self.__get_client_container_caches()[self.container_link]["_rid"] + result = self.client_connection.QueryItemsChangeFeed( self.container_link, options=feed_options, response_hook=response_hook, **kwargs ) @@ -461,13 +636,14 @@ def query_items( # pylint:disable=docstring-missing-param if populate_index_metrics is not None: feed_options["populateIndexMetrics"] = populate_index_metrics if partition_key is not None: + partition_key_value = self._set_partition_key(partition_key) if self.__is_prefix_partitionkey(partition_key): kwargs["isPrefixPartitionQuery"] = True properties = self._get_properties() kwargs["partitionKeyDefinition"] = properties["partitionKey"] - kwargs["partitionKeyDefinition"]["partition_key"] = partition_key + kwargs["partitionKeyDefinition"]["partition_key"] = partition_key_value else: - feed_options["partitionKey"] = self._set_partition_key(partition_key) + feed_options["partitionKey"] = partition_key_value if enable_scan_in_query is not None: feed_options["enableScanInQuery"] = enable_scan_in_query if max_integrated_cache_staleness_in_ms: @@ -494,16 +670,11 @@ def query_items( # pylint:disable=docstring-missing-param return items def __is_prefix_partitionkey( - self, partition_key: PartitionKeyType - ) -> bool: + self, partition_key: PartitionKeyType) -> bool: properties = self._get_properties() pk_properties = properties["partitionKey"] partition_key_definition = PartitionKey(path=pk_properties["paths"], kind=pk_properties["kind"]) - if partition_key_definition.kind != "MultiHash": - return False - if isinstance(partition_key, list) and len(partition_key_definition['paths']) == len(partition_key): - return False - return True + return partition_key_definition._is_prefix_partition_key(partition_key) @distributed_trace @@ -1192,3 +1363,29 @@ def delete_all_items_by_partition_key( self.client_connection.DeleteAllItemsByPartitionKey( collection_link=self.container_link, options=request_options, **kwargs) + + def read_feed_ranges( + self, + *, + force_refresh: Optional[bool] = False, + **kwargs: Any) -> List[FeedRange]: + + """ Obtains a list of feed ranges that can be used to parallelize feed operations. + + :keyword bool force_refresh: + Flag to indicate whether obtain the list of feed ranges directly from cache or refresh the cache. + :returns: A list representing the feed ranges in base64 encoded string + :rtype: List[str] + + """ + if force_refresh is True: + self.client_connection.refresh_routing_map_provider() + + partition_key_ranges =\ + self.client_connection._routing_map_provider.get_overlapping_ranges( + self.container_link, + [Range("", "FF", True, False)], # default to full range + **kwargs) + + return [FeedRangeEpk(Range.PartitionKeyRangeToRange(partitionKeyRange)) + for partitionKeyRange in partition_key_ranges] diff --git a/sdk/cosmos/azure-cosmos/azure/cosmos/exceptions.py b/sdk/cosmos/azure-cosmos/azure/cosmos/exceptions.py index 5092fd0de7cf..7170a4d1dc39 100644 --- a/sdk/cosmos/azure-cosmos/azure/cosmos/exceptions.py +++ b/sdk/cosmos/azure-cosmos/azure/cosmos/exceptions.py @@ -28,7 +28,7 @@ ResourceNotFoundError ) from . import http_constants - +from .http_constants import StatusCodes, SubStatusCodes class CosmosHttpResponseError(HttpResponseError): """An HTTP request to the Azure Cosmos database service has failed.""" @@ -135,7 +135,6 @@ def __init__(self, **kwargs): self.history = None super(CosmosClientTimeoutError, self).__init__(message, **kwargs) - def _partition_range_is_gone(e): if (e.status_code == http_constants.StatusCodes.GONE and e.sub_status == http_constants.SubStatusCodes.PARTITION_KEY_RANGE_GONE): @@ -151,3 +150,7 @@ def _container_recreate_exception(e) -> bool: is_throughput_not_found = e.sub_status == http_constants.SubStatusCodes.THROUGHPUT_OFFER_NOT_FOUND return (is_bad_request and is_collection_rid_mismatch) or (is_not_found and is_throughput_not_found) + + +def _is_partition_split_or_merge(e): + return e.status_code == StatusCodes.GONE and e.status_code == SubStatusCodes.COMPLETING_SPLIT diff --git a/sdk/cosmos/azure-cosmos/azure/cosmos/partition_key.py b/sdk/cosmos/azure-cosmos/azure/cosmos/partition_key.py index 22fcb19dae06..7fa093aa15e1 100644 --- a/sdk/cosmos/azure-cosmos/azure/cosmos/partition_key.py +++ b/sdk/cosmos/azure-cosmos/azure/cosmos/partition_key.py @@ -23,7 +23,7 @@ from io import BytesIO import binascii import struct -from typing import IO, Sequence, Type, Union, overload, List +from typing import IO, Sequence, Type, Union, overload, List, cast from typing_extensions import Literal from ._cosmos_integers import _UInt64, _UInt128 @@ -173,6 +173,20 @@ def _get_epk_range_for_prefix_partition_key( max_epk = str(min_epk) + "FF" return _Range(min_epk, max_epk, True, False) + def _get_epk_range_for_partition_key( + self, + pk_value: Union[str, int, float, bool, Sequence[Union[str, int, float, bool, None]], Type[NonePartitionKeyValue]] # pylint: disable=line-too-long + ) -> _Range: + if self._is_prefix_partition_key(pk_value): + return self._get_epk_range_for_prefix_partition_key( + cast(Sequence[Union[None, bool, int, float, str, Type[NonePartitionKeyValue]]], pk_value)) + + # else return point range + effective_partition_key_string =\ + self._get_effective_partition_key_string( + cast(List[Union[None, bool, int, float, str, _Undefined, Type[NonePartitionKeyValue]]], [pk_value])) + return _Range(effective_partition_key_string, effective_partition_key_string, True, True) + def _get_effective_partition_key_for_hash_partitioning(self) -> str: # We shouldn't be supporting V1 return "" @@ -265,6 +279,15 @@ def _get_effective_partition_key_for_multi_hash_partitioning_v2( return ''.join(sb).upper() + def _is_prefix_partition_key( + self, + partition_key: Union[str, int, float, bool, Sequence[Union[str, int, float, bool, None]], Type[NonePartitionKeyValue]]) -> bool: # pylint: disable=line-too-long + if self.kind!= "MultiHash": + return False + if isinstance(partition_key, list) and len(self['paths']) == len(partition_key): + return False + return True + def _return_undefined_or_empty_partition_key(is_system_key: bool) -> Union[_Empty, _Undefined]: if is_system_key: diff --git a/sdk/cosmos/azure-cosmos/samples/examples.py b/sdk/cosmos/azure-cosmos/samples/examples.py index 8039218b7a09..958d72c064d1 100644 --- a/sdk/cosmos/azure-cosmos/samples/examples.py +++ b/sdk/cosmos/azure-cosmos/samples/examples.py @@ -255,4 +255,23 @@ query='SELECT * FROM products p WHERE p.state = "GA"' ): container.delete_item(item, partition_key=["GA", "Atlanta", 30363]) -# [END delete_items] \ No newline at end of file +# [END delete_items] + +# Get the feed ranges list from container. +# [START read_feed_ranges] +container.read_feed_ranges() +# [END read_feed_ranges] + +# Query a sorted list of items that were changed for one feed range +# [START query_items_change_feed] +feed_ranges = container.read_feed_ranges() +for item in container.query_items_change_feed(feed_range=feed_ranges[0]): + print(json.dumps(item, indent=True)) +# [END query_items_change_feed] + +# Query a sorted list of items that were changed for one feed range +# [START query_items_change_feed_from_beginning] +feed_ranges = container.read_feed_ranges() +for item in container.query_items_change_feed(feed_range=feed_ranges[0], start_time="Beginning"): + print(json.dumps(item, indent=True)) +# [END query_items_change_feed_from_beginning] \ No newline at end of file diff --git a/sdk/cosmos/azure-cosmos/samples/examples_async.py b/sdk/cosmos/azure-cosmos/samples/examples_async.py index 06cec5fb07a8..33805fc71d7d 100644 --- a/sdk/cosmos/azure-cosmos/samples/examples_async.py +++ b/sdk/cosmos/azure-cosmos/samples/examples_async.py @@ -263,6 +263,29 @@ async def examples_async(): await container.delete_item(item, partition_key=["GA", "Atlanta", 30363]) # [END delete_items] + # Get the feed ranges list from container. + # [START read_feed_ranges] + await container.read_feed_ranges() + # [END read_feed_ranges] + + # Query a sorted list of items that were changed for one feed range. + # The asynchronous client returns asynchronous iterators for its query methods; + # as such, we iterate over it by using an async for loop + # [START query_items_change_feed] + feed_ranges = await container.read_feed_ranges() + async for item in container.query_items_change_feed(feed_range=feed_ranges[0]): + print(json.dumps(item, indent=True)) + # [END query_items_change_feed] + + # Query a sorted list of items that were changed for one feed range from beginning. + # The asynchronous client returns asynchronous iterators for its query methods; + # as such, we iterate over it by using an async for loop + # [START query_items_change_feed_from_beginning] + feed_ranges = await container.read_feed_ranges() + async for item in container.query_items_change_feed(feed_range=feed_ranges[0], start_time="Beginning"): + print(json.dumps(item, indent=True)) + # [END query_items_change_feed_from_beginning] + await client.delete_database(database_name) print("Sample done running!") diff --git a/sdk/cosmos/azure-cosmos/test/test_change_feed.py b/sdk/cosmos/azure-cosmos/test/test_change_feed.py new file mode 100644 index 000000000000..01e2dc21ddb6 --- /dev/null +++ b/sdk/cosmos/azure-cosmos/test/test_change_feed.py @@ -0,0 +1,256 @@ +# The MIT License (MIT) +# Copyright (c) Microsoft Corporation. All rights reserved. + +import unittest +import uuid +from datetime import datetime, timedelta, timezone +from time import sleep + +import pytest +from _pytest.outcomes import fail + +import azure.cosmos.cosmos_client as cosmos_client +import azure.cosmos.exceptions as exceptions +import test_config +from azure.cosmos.partition_key import PartitionKey + + +@pytest.fixture(scope="class") +def setup(): + config = test_config.TestConfig() + if (config.masterKey == '[YOUR_KEY_HERE]' or + config.host == '[YOUR_ENDPOINT_HERE]'): + raise Exception( + "You must specify your Azure Cosmos account values for " + "'masterKey' and 'host' at the top of this class to run the " + "tests.") + test_client = cosmos_client.CosmosClient(config.host, config.masterKey), + return { + "created_db": test_client[0].get_database_client(config.TEST_DATABASE_ID) + } + +@pytest.mark.cosmosEmulator +@pytest.mark.unittest +@pytest.mark.usefixtures("setup") +class TestChangeFeed: + """Test to ensure escaping of non-ascii characters from partition key""" + + def test_get_feed_ranges(self, setup): + created_collection = setup["created_db"].create_container("get_feed_ranges_" + str(uuid.uuid4()), + PartitionKey(path="/pk")) + result = created_collection.read_feed_ranges() + assert len(result) == 1 + + @pytest.mark.parametrize("change_feed_filter_param", ["partitionKey", "partitionKeyRangeId", "feedRange"]) + def test_query_change_feed_with_different_filter(self, change_feed_filter_param, setup): + created_collection = setup["created_db"].create_container("change_feed_test_" + str(uuid.uuid4()), + PartitionKey(path="/pk")) + + # Read change feed without passing any options + query_iterable = created_collection.query_items_change_feed() + iter_list = list(query_iterable) + assert len(iter_list) == 0 + + if change_feed_filter_param == "partitionKey": + filter_param = {"partition_key": "pk"} + elif change_feed_filter_param == "partitionKeyRangeId": + filter_param = {"partition_key_range_id": "0"} + elif change_feed_filter_param == "feedRange": + feed_ranges = created_collection.read_feed_ranges() + assert len(feed_ranges) == 1 + filter_param = {"feed_range": feed_ranges[0]} + else: + filter_param = None + + # Read change feed from current should return an empty list + query_iterable = created_collection.query_items_change_feed(filter_param) + iter_list = list(query_iterable) + assert len(iter_list) == 0 + assert 'etag' in created_collection.client_connection.last_response_headers + assert created_collection.client_connection.last_response_headers['etag'] !='' + + # Read change feed from beginning should return an empty list + query_iterable = created_collection.query_items_change_feed( + is_start_from_beginning=True, + **filter_param + ) + iter_list = list(query_iterable) + assert len(iter_list) == 0 + assert 'etag' in created_collection.client_connection.last_response_headers + continuation1 = created_collection.client_connection.last_response_headers['etag'] + assert continuation1 != '' + + # Create a document. Read change feed should return be able to read that document + document_definition = {'pk': 'pk', 'id': 'doc1'} + created_collection.create_item(body=document_definition) + query_iterable = created_collection.query_items_change_feed( + is_start_from_beginning=True, + **filter_param + ) + iter_list = list(query_iterable) + assert len(iter_list) == 1 + assert iter_list[0]['id'] == 'doc1' + assert 'etag' in created_collection.client_connection.last_response_headers + continuation2 = created_collection.client_connection.last_response_headers['etag'] + assert continuation2 != '' + assert continuation2 != continuation1 + + # Create two new documents. Verify that change feed contains the 2 new documents + # with page size 1 and page size 100 + document_definition = {'pk': 'pk', 'id': 'doc2'} + created_collection.create_item(body=document_definition) + document_definition = {'pk': 'pk', 'id': 'doc3'} + created_collection.create_item(body=document_definition) + + for pageSize in [1, 100]: + # verify iterator + query_iterable = created_collection.query_items_change_feed( + continuation=continuation2, + max_item_count=pageSize, + **filter_param + ) + it = query_iterable.__iter__() + expected_ids = 'doc2.doc3.' + actual_ids = '' + for item in it: + actual_ids += item['id'] + '.' + assert actual_ids == expected_ids + + # verify by_page + # the options is not copied, therefore it need to be restored + query_iterable = created_collection.query_items_change_feed( + continuation=continuation2, + max_item_count=pageSize, + **filter_param + ) + count = 0 + expected_count = 2 + all_fetched_res = [] + for page in query_iterable.by_page(): + fetched_res = list(page) + assert len(fetched_res) == min(pageSize, expected_count - count) + count += len(fetched_res) + all_fetched_res.extend(fetched_res) + + actual_ids = '' + for item in all_fetched_res: + actual_ids += item['id'] + '.' + assert actual_ids == expected_ids + + # verify reading change feed from the beginning + query_iterable = created_collection.query_items_change_feed( + is_start_from_beginning=True, + **filter_param + ) + expected_ids = ['doc1', 'doc2', 'doc3'] + it = query_iterable.__iter__() + for i in range(0, len(expected_ids)): + doc = next(it) + assert doc['id'] == expected_ids[i] + assert 'etag' in created_collection.client_connection.last_response_headers + continuation3 = created_collection.client_connection.last_response_headers['etag'] + + # verify reading empty change feed + query_iterable = created_collection.query_items_change_feed( + continuation=continuation3, + is_start_from_beginning=True, + **filter_param + ) + iter_list = list(query_iterable) + assert len(iter_list) == 0 + setup["created_db"].delete_container(created_collection.id) + + def test_query_change_feed_with_start_time(self, setup): + created_collection = setup["created_db"].create_container_if_not_exists("query_change_feed_start_time_test", + PartitionKey(path="/pk")) + batchSize = 50 + + def round_time(): + utc_now = datetime.now(timezone.utc) + return utc_now - timedelta(microseconds=utc_now.microsecond) + def create_random_items(container, batch_size): + for _ in range(batch_size): + # Generate a Random partition key + partition_key = 'pk' + str(uuid.uuid4()) + + # Generate a random item + item = { + 'id': 'item' + str(uuid.uuid4()), + 'partitionKey': partition_key, + 'content': 'This is some random content', + } + + try: + # Create the item in the container + container.upsert_item(item) + except exceptions.CosmosHttpResponseError as e: + fail(e) + + # Create first batch of random items + create_random_items(created_collection, batchSize) + + # wait for 1 second and record the time, then wait another second + sleep(1) + start_time = round_time() + not_utc_time = datetime.now() + sleep(1) + + # now create another batch of items + create_random_items(created_collection, batchSize) + + # now query change feed based on start time + change_feed_iter = list(created_collection.query_items_change_feed(start_time=start_time)) + totalCount = len(change_feed_iter) + + # now check if the number of items that were changed match the batch size + assert totalCount == batchSize + + # negative test: pass in a valid time in the future + future_time = start_time + timedelta(hours=1) + change_feed_iter = list(created_collection.query_items_change_feed(start_time=future_time)) + totalCount = len(change_feed_iter) + # A future time should return 0 + assert totalCount == 0 + + # test a date that is not utc, will be converted to utc by sdk + change_feed_iter = list(created_collection.query_items_change_feed(start_time=not_utc_time)) + totalCount = len(change_feed_iter) + # Should equal batch size + assert totalCount == batchSize + + # test an invalid value, Attribute error will be raised for passing non datetime object + invalid_time = "Invalid value" + try: + list(created_collection.query_items_change_feed(start_time=invalid_time)) + fail("Cannot format date on a non datetime object.") + except ValueError as e: #TODO: previously it is throwing AttributeError, now has changed into ValueError, is it breaking change? + assert "Invalid start_time 'Invalid value'" == e.args[0] + + setup["created_db"].delete_container(created_collection.id) + + def test_query_change_feed_with_multi_partition(self, setup): + created_collection = setup["created_db"].create_container("change_feed_test_" + str(uuid.uuid4()), + PartitionKey(path="/pk"), + offer_throughput=11000) + + # create one doc and make sure change feed query can return the document + new_documents = [ + {'pk': 'pk', 'id': 'doc1'}, + {'pk': 'pk2', 'id': 'doc2'}, + {'pk': 'pk3', 'id': 'doc3'}, + {'pk': 'pk4', 'id': 'doc4'}] + expected_ids = ['doc1', 'doc2', 'doc3', 'doc4'] + + for document in new_documents: + created_collection.create_item(body=document) + + query_iterable = created_collection.query_items_change_feed(start_time="Beginning") + it = query_iterable.__iter__() + actual_ids = [] + for item in it: + actual_ids.append(item['id']) + + assert actual_ids == expected_ids + +if __name__ == "__main__": + unittest.main() diff --git a/sdk/cosmos/azure-cosmos/test/test_change_feed_async.py b/sdk/cosmos/azure-cosmos/test/test_change_feed_async.py new file mode 100644 index 000000000000..2ef61ee5c8a3 --- /dev/null +++ b/sdk/cosmos/azure-cosmos/test/test_change_feed_async.py @@ -0,0 +1,280 @@ +# The MIT License (MIT) +# Copyright (c) Microsoft Corporation. All rights reserved. + +import unittest +import uuid +from asyncio import sleep +from datetime import datetime, timedelta, timezone + +import pytest +import pytest_asyncio +from _pytest.outcomes import fail + +import azure.cosmos.exceptions as exceptions +import test_config +from azure.cosmos.aio import CosmosClient +from azure.cosmos.partition_key import PartitionKey + + +@pytest_asyncio.fixture() +async def setup(): + config = test_config.TestConfig() + if config.masterKey == '[YOUR_KEY_HERE]' or config.host == '[YOUR_ENDPOINT_HERE]': + raise Exception( + "You must specify your Azure Cosmos account values for " + "'masterKey' and 'host' at the top of this class to run the " + "tests.") + test_client = CosmosClient(config.host, config.masterKey) + created_db = await test_client.create_database_if_not_exists(config.TEST_DATABASE_ID) + created_db_data = { + "created_db": created_db + } + + yield created_db_data + await test_client.close() + +@pytest.mark.cosmosEmulator +@pytest.mark.asyncio +@pytest.mark.usefixtures("setup") +class TestChangeFeedAsync: + """Test to ensure escaping of non-ascii characters from partition key""" + + async def test_get_feed_ranges(self, setup): + created_collection = await setup["created_db"].create_container("get_feed_ranges_" + str(uuid.uuid4()), + PartitionKey(path="/pk")) + result = await created_collection.read_feed_ranges() + assert len(result) == 1 + + @pytest.mark.parametrize("change_feed_filter_param", ["partitionKey", "partitionKeyRangeId", "feedRange"]) + async def test_query_change_feed_with_different_filter_async(self, change_feed_filter_param, setup): + + created_collection = await setup["created_db"].create_container( + "change_feed_test_" + str(uuid.uuid4()), + PartitionKey(path="/pk")) + + if change_feed_filter_param == "partitionKey": + filter_param = {"partition_key": "pk"} + elif change_feed_filter_param == "partitionKeyRangeId": + filter_param = {"partition_key_range_id": "0"} + elif change_feed_filter_param == "feedRange": + feed_ranges = await created_collection.read_feed_ranges() + assert len(feed_ranges) == 1 + filter_param = {"feed_range": feed_ranges[0]} + else: + filter_param = None + + # Read change feed without passing any options + query_iterable = created_collection.query_items_change_feed() + iter_list = [item async for item in query_iterable] + assert len(iter_list) == 0 + + # Read change feed from current should return an empty list + query_iterable = created_collection.query_items_change_feed(filter_param) + iter_list = [item async for item in query_iterable] + assert len(iter_list) == 0 + if 'Etag' in created_collection.client_connection.last_response_headers: + assert created_collection.client_connection.last_response_headers['Etag'] != '' + elif 'etag' in created_collection.client_connection.last_response_headers: + assert created_collection.client_connection.last_response_headers['etag'] != '' + else: + fail("No Etag or etag found in last response headers") + + # Read change feed from beginning should return an empty list + query_iterable = created_collection.query_items_change_feed( + is_start_from_beginning=True, + **filter_param + ) + iter_list = [item async for item in query_iterable] + assert len(iter_list) == 0 + if 'Etag' in created_collection.client_connection.last_response_headers: + continuation1 = created_collection.client_connection.last_response_headers['Etag'] + elif 'etag' in created_collection.client_connection.last_response_headers: + continuation1 = created_collection.client_connection.last_response_headers['etag'] + else: + fail("No Etag or etag found in last response headers") + assert continuation1 != '' + + # Create a document. Read change feed should return be able to read that document + document_definition = {'pk': 'pk', 'id': 'doc1'} + await created_collection.create_item(body=document_definition) + query_iterable = created_collection.query_items_change_feed( + is_start_from_beginning=True, + **filter_param + ) + iter_list = [item async for item in query_iterable] + assert len(iter_list) == 1 + assert iter_list[0]['id'] == 'doc1' + if 'Etag' in created_collection.client_connection.last_response_headers: + continuation2 = created_collection.client_connection.last_response_headers['Etag'] + elif 'etag' in created_collection.client_connection.last_response_headers: + continuation2 = created_collection.client_connection.last_response_headers['etag'] + else: + fail("No Etag or etag found in last response headers") + assert continuation2 != '' + assert continuation2 != continuation1 + + # Create two new documents. Verify that change feed contains the 2 new documents + # with page size 1 and page size 100 + document_definition = {'pk': 'pk', 'id': 'doc2'} + await created_collection.create_item(body=document_definition) + document_definition = {'pk': 'pk', 'id': 'doc3'} + await created_collection.create_item(body=document_definition) + + for pageSize in [2, 100]: + # verify iterator + query_iterable = created_collection.query_items_change_feed( + continuation=continuation2, + max_item_count=pageSize, + **filter_param) + it = query_iterable.__aiter__() + expected_ids = 'doc2.doc3.' + actual_ids = '' + async for item in it: + actual_ids += item['id'] + '.' + assert actual_ids == expected_ids + + # verify by_page + # the options is not copied, therefore it need to be restored + query_iterable = created_collection.query_items_change_feed( + continuation=continuation2, + max_item_count=pageSize, + **filter_param + ) + count = 0 + expected_count = 2 + all_fetched_res = [] + pages = query_iterable.by_page() + async for items in await pages.__anext__(): + count += 1 + all_fetched_res.append(items) + assert count == expected_count + + actual_ids = '' + for item in all_fetched_res: + actual_ids += item['id'] + '.' + assert actual_ids == expected_ids + + # verify reading change feed from the beginning + query_iterable = created_collection.query_items_change_feed( + is_start_from_beginning=True, + **filter_param + ) + expected_ids = ['doc1', 'doc2', 'doc3'] + it = query_iterable.__aiter__() + for i in range(0, len(expected_ids)): + doc = await it.__anext__() + assert doc['id'] == expected_ids[i] + if 'Etag' in created_collection.client_connection.last_response_headers: + continuation3 = created_collection.client_connection.last_response_headers['Etag'] + elif 'etag' in created_collection.client_connection.last_response_headers: + continuation3 = created_collection.client_connection.last_response_headers['etag'] + else: + fail("No Etag or etag found in last response headers") + + # verify reading empty change feed + query_iterable = created_collection.query_items_change_feed( + continuation=continuation3, + is_start_from_beginning=True, + **filter_param + ) + iter_list = [item async for item in query_iterable] + assert len(iter_list) == 0 + + await setup["created_db"].delete_container(created_collection.id) + + @pytest.mark.asyncio + async def test_query_change_feed_with_start_time(self, setup): + created_collection = await setup["created_db"].create_container_if_not_exists("query_change_feed_start_time_test", + PartitionKey(path="/pk")) + batchSize = 50 + + def round_time(): + utc_now = datetime.now(timezone.utc) + return utc_now - timedelta(microseconds=utc_now.microsecond) + + async def create_random_items(container, batch_size): + for _ in range(batch_size): + # Generate a Random partition key + partition_key = 'pk' + str(uuid.uuid4()) + + # Generate a random item + item = { + 'id': 'item' + str(uuid.uuid4()), + 'partitionKey': partition_key, + 'content': 'This is some random content', + } + + try: + # Create the item in the container + await container.upsert_item(item) + except exceptions.CosmosHttpResponseError as e: + pytest.fail(e) + + # Create first batch of random items + await create_random_items(created_collection, batchSize) + + # wait for 1 second and record the time, then wait another second + await sleep(1) + start_time = round_time() + not_utc_time = datetime.now() + await sleep(1) + + # now create another batch of items + await create_random_items(created_collection, batchSize) + + # now query change feed based on start time + change_feed_iter = [i async for i in created_collection.query_items_change_feed(start_time=start_time)] + totalCount = len(change_feed_iter) + + # now check if the number of items that were changed match the batch size + assert totalCount == batchSize + + # negative test: pass in a valid time in the future + future_time = start_time + timedelta(hours=1) + change_feed_iter = [i async for i in created_collection.query_items_change_feed(start_time=future_time)] + totalCount = len(change_feed_iter) + # A future time should return 0 + assert totalCount == 0 + + # test a date that is not utc, will be converted to utc by sdk + change_feed_iter = [i async for i in created_collection.query_items_change_feed(start_time=not_utc_time)] + totalCount = len(change_feed_iter) + # Should equal batch size + assert totalCount == batchSize + + # test an invalid value, Attribute error will be raised for passing non datetime object + invalid_time = "Invalid value" + try: + change_feed_iter = [i async for i in created_collection.query_items_change_feed(start_time=invalid_time)] + fail("Cannot format date on a non datetime object.") + except ValueError as e: + assert ("Invalid start_time 'Invalid value'" == e.args[0]) + + await setup["created_db"].delete_container(created_collection.id) + + async def test_query_change_feed_with_multi_partition_async(self, setup): + created_collection = await setup["created_db"].create_container("change_feed_test_" + str(uuid.uuid4()), + PartitionKey(path="/pk"), + offer_throughput=11000) + + # create one doc and make sure change feed query can return the document + new_documents = [ + {'pk': 'pk', 'id': 'doc1'}, + {'pk': 'pk2', 'id': 'doc2'}, + {'pk': 'pk3', 'id': 'doc3'}, + {'pk': 'pk4', 'id': 'doc4'}] + expected_ids = ['doc1', 'doc2', 'doc3', 'doc4'] + + for document in new_documents: + await created_collection.create_item(body=document) + + query_iterable = created_collection.query_items_change_feed(start_time="Beginning") + it = query_iterable.__aiter__() + actual_ids = [] + async for item in it: + actual_ids.append(item['id']) + + assert actual_ids == expected_ids + +if __name__ == '__main__': + unittest.main() diff --git a/sdk/cosmos/azure-cosmos/test/test_change_feed_split.py b/sdk/cosmos/azure-cosmos/test/test_change_feed_split.py new file mode 100644 index 000000000000..8ecb7da9cff3 --- /dev/null +++ b/sdk/cosmos/azure-cosmos/test/test_change_feed_split.py @@ -0,0 +1,81 @@ +# The MIT License (MIT) +# Copyright (c) Microsoft Corporation. All rights reserved. + +import time +import unittest +import uuid + +import azure.cosmos.cosmos_client as cosmos_client +import test_config +from azure.cosmos import DatabaseProxy, PartitionKey + + +class TestPartitionSplitChangeFeed(unittest.TestCase): + database: DatabaseProxy = None + client: cosmos_client.CosmosClient = None + configs = test_config.TestConfig + host = configs.host + masterKey = configs.masterKey + TEST_DATABASE_ID = configs.TEST_DATABASE_ID + + @classmethod + def setUpClass(cls): + cls.client = cosmos_client.CosmosClient(cls.host, cls.masterKey) + cls.database = cls.client.get_database_client(cls.TEST_DATABASE_ID) + + def test_query_change_feed_with_split(self): + created_collection = self.database.create_container("change_feed_split_test_" + str(uuid.uuid4()), + PartitionKey(path="/pk"), + offer_throughput=400) + + # initial change feed query returns empty result + query_iterable = created_collection.query_items_change_feed(start_time="Beginning") + iter_list = list(query_iterable) + assert len(iter_list) == 0 + continuation = created_collection.client_connection.last_response_headers['etag'] + assert continuation != '' + + # create one doc and make sure change feed query can return the document + document_definition = {'pk': 'pk', 'id': 'doc1'} + created_collection.create_item(body=document_definition) + query_iterable = created_collection.query_items_change_feed(continuation=continuation) + iter_list = list(query_iterable) + assert len(iter_list) == 1 + continuation = created_collection.client_connection.last_response_headers['etag'] + + print("Triggering a split in test_query_change_feed_with_split") + created_collection.replace_throughput(11000) + print("changed offer to 11k") + print("--------------------------------") + print("Waiting for split to complete") + start_time = time.time() + + while True: + offer = created_collection.get_throughput() + if offer.properties['content'].get('isOfferReplacePending', False): + if time.time() - start_time > 60 * 25: # timeout test at 25 minutes + unittest.skip("Partition split didn't complete in time.") + else: + print("Waiting for split to complete") + time.sleep(60) + else: + break + + print("Split in test_query_change_feed_with_split has completed") + print("creating few more documents") + new_documents = [{'pk': 'pk2', 'id': 'doc2'}, {'pk': 'pk3', 'id': 'doc3'}, {'pk': 'pk4', 'id': 'doc4'}] + expected_ids = ['doc2', 'doc3', 'doc4'] + for document in new_documents: + created_collection.create_item(body=document) + + query_iterable = created_collection.query_items_change_feed(continuation=continuation) + it = query_iterable.__iter__() + actual_ids = [] + for item in it: + actual_ids.append(item['id']) + + assert actual_ids == expected_ids + self.database.delete_container(created_collection.id) + +if __name__ == "__main__": + unittest.main() diff --git a/sdk/cosmos/azure-cosmos/test/test_change_feed_split_async.py b/sdk/cosmos/azure-cosmos/test/test_change_feed_split_async.py new file mode 100644 index 000000000000..60f7b2810884 --- /dev/null +++ b/sdk/cosmos/azure-cosmos/test/test_change_feed_split_async.py @@ -0,0 +1,94 @@ +# The MIT License (MIT) +# Copyright (c) Microsoft Corporation. All rights reserved. + +import time +import unittest +import uuid + +import test_config +from azure.cosmos import PartitionKey +from azure.cosmos.aio import CosmosClient, DatabaseProxy + + +class TestPartitionSplitChangeFeedAsync(unittest.IsolatedAsyncioTestCase): + host = test_config.TestConfig.host + masterKey = test_config.TestConfig.masterKey + connectionPolicy = test_config.TestConfig.connectionPolicy + + client: CosmosClient = None + created_database: DatabaseProxy = None + + TEST_DATABASE_ID = test_config.TestConfig.TEST_DATABASE_ID + + @classmethod + def setUpClass(cls): + if (cls.masterKey == '[YOUR_KEY_HERE]' or + cls.host == '[YOUR_ENDPOINT_HERE]'): + raise Exception( + "You must specify your Azure Cosmos account values for " + "'masterKey' and 'host' at the top of this class to run the " + "tests.") + + async def asyncSetUp(self): + self.client = CosmosClient(self.host, self.masterKey) + self.created_database = self.client.get_database_client(self.TEST_DATABASE_ID) + + async def tearDown(self): + await self.client.close() + + async def test_query_change_feed_with_split_async(self): + created_collection = await self.created_database.create_container("change_feed_test_" + str(uuid.uuid4()), + PartitionKey(path="/pk"), + offer_throughput=400) + + # initial change feed query returns empty result + query_iterable = created_collection.query_items_change_feed(start_time="Beginning") + iter_list = [item async for item in query_iterable] + assert len(iter_list) == 0 + continuation = created_collection.client_connection.last_response_headers['etag'] + assert continuation != '' + + # create one doc and make sure change feed query can return the document + document_definition = {'pk': 'pk', 'id': 'doc1'} + await created_collection.create_item(body=document_definition) + query_iterable = created_collection.query_items_change_feed(continuation=continuation) + iter_list = [item async for item in query_iterable] + assert len(iter_list) == 1 + continuation = created_collection.client_connection.last_response_headers['etag'] + + print("Triggering a split in test_query_change_feed_with_split") + await created_collection.replace_throughput(11000) + print("changed offer to 11k") + print("--------------------------------") + print("Waiting for split to complete") + start_time = time.time() + + while True: + offer = await created_collection.get_throughput() + if offer.properties['content'].get('isOfferReplacePending', False): + if time.time() - start_time > 60 * 25: # timeout test at 25 minutes + unittest.skip("Partition split didn't complete in time.") + else: + print("Waiting for split to complete") + time.sleep(60) + else: + break + + print("Split in test_query_change_feed_with_split has completed") + print("creating few more documents") + new_documents = [{'pk': 'pk2', 'id': 'doc2'}, {'pk': 'pk3', 'id': 'doc3'}, {'pk': 'pk4', 'id': 'doc4'}] + expected_ids = ['doc2', 'doc3', 'doc4'] + for document in new_documents: + await created_collection.create_item(body=document) + + query_iterable = created_collection.query_items_change_feed(continuation=continuation) + it = query_iterable.__aiter__() + actual_ids = [] + async for item in it: + actual_ids.append(item['id']) + + assert actual_ids == expected_ids + self.created_database.delete_container(created_collection.id) + +if __name__ == '__main__': + unittest.main() \ No newline at end of file diff --git a/sdk/cosmos/azure-cosmos/test/test_container_properties_cache.py b/sdk/cosmos/azure-cosmos/test/test_container_properties_cache.py index 6ced2c6d0cd9..fbac47dfb215 100644 --- a/sdk/cosmos/azure-cosmos/test/test_container_properties_cache.py +++ b/sdk/cosmos/azure-cosmos/test/test_container_properties_cache.py @@ -599,7 +599,7 @@ def test_container_recreate_change_feed(self): client.client_connection._CosmosClientConnection__container_properties_cache = copy.deepcopy(old_cache) # Query change feed for the new items - change_feed = list(created_container.query_items_change_feed()) + change_feed = list(created_container.query_items_change_feed(start_time='Beginning')) self.assertEqual(len(change_feed), 2) # Verify that the change feed contains the new items diff --git a/sdk/cosmos/azure-cosmos/test/test_container_properties_cache_async.py b/sdk/cosmos/azure-cosmos/test/test_container_properties_cache_async.py index 88fd6e20cc14..8cf3b9f39ba0 100644 --- a/sdk/cosmos/azure-cosmos/test/test_container_properties_cache_async.py +++ b/sdk/cosmos/azure-cosmos/test/test_container_properties_cache_async.py @@ -612,7 +612,7 @@ async def test_container_recreate_change_feed(self): client.client_connection._CosmosClientConnection__container_properties_cache = copy.deepcopy(old_cache) # Query change feed for the new items - change_feed = [item async for item in created_container.query_items_change_feed()] + change_feed = [item async for item in created_container.query_items_change_feed(start_time='Beginning')] assert len(change_feed) == 2 # Verify that the change feed contains the new items diff --git a/sdk/cosmos/azure-cosmos/test/test_query.py b/sdk/cosmos/azure-cosmos/test/test_query.py index 2b5a8e5c4590..22a27b3f7e08 100644 --- a/sdk/cosmos/azure-cosmos/test/test_query.py +++ b/sdk/cosmos/azure-cosmos/test/test_query.py @@ -4,8 +4,7 @@ import os import unittest import uuid -from datetime import datetime, timedelta, timezone -from time import sleep + import pytest import azure.cosmos._retry_utility as retry_utility @@ -55,293 +54,6 @@ def test_first_and_last_slashes_trimmed_for_query_string(self): self.assertEqual(iter_list[0]['id'], doc_id) self.created_db.delete_container(created_collection.id) - def test_query_change_feed_with_pk(self): - created_collection = self.created_db.create_container("change_feed_test_" + str(uuid.uuid4()), - PartitionKey(path="/pk")) - # The test targets partition #3 - partition_key = "pk" - - # Read change feed without passing any options - query_iterable = created_collection.query_items_change_feed() - iter_list = list(query_iterable) - self.assertEqual(len(iter_list), 0) - - # Read change feed from current should return an empty list - query_iterable = created_collection.query_items_change_feed(partition_key=partition_key) - iter_list = list(query_iterable) - self.assertEqual(len(iter_list), 0) - self.assertTrue('etag' in created_collection.client_connection.last_response_headers) - self.assertNotEqual(created_collection.client_connection.last_response_headers['etag'], '') - - # Read change feed from beginning should return an empty list - query_iterable = created_collection.query_items_change_feed( - is_start_from_beginning=True, - partition_key=partition_key - ) - iter_list = list(query_iterable) - self.assertEqual(len(iter_list), 0) - self.assertTrue('etag' in created_collection.client_connection.last_response_headers) - continuation1 = created_collection.client_connection.last_response_headers['etag'] - self.assertNotEqual(continuation1, '') - - # Create a document. Read change feed should return be able to read that document - document_definition = {'pk': 'pk', 'id': 'doc1'} - created_collection.create_item(body=document_definition) - query_iterable = created_collection.query_items_change_feed( - is_start_from_beginning=True, - partition_key=partition_key - ) - iter_list = list(query_iterable) - self.assertEqual(len(iter_list), 1) - self.assertEqual(iter_list[0]['id'], 'doc1') - self.assertTrue('etag' in created_collection.client_connection.last_response_headers) - continuation2 = created_collection.client_connection.last_response_headers['etag'] - self.assertNotEqual(continuation2, '') - self.assertNotEqual(continuation2, continuation1) - - # Create two new documents. Verify that change feed contains the 2 new documents - # with page size 1 and page size 100 - document_definition = {'pk': 'pk', 'id': 'doc2'} - created_collection.create_item(body=document_definition) - document_definition = {'pk': 'pk', 'id': 'doc3'} - created_collection.create_item(body=document_definition) - - for pageSize in [1, 100]: - # verify iterator - query_iterable = created_collection.query_items_change_feed( - continuation=continuation2, - max_item_count=pageSize, - partition_key=partition_key - ) - it = query_iterable.__iter__() - expected_ids = 'doc2.doc3.' - actual_ids = '' - for item in it: - actual_ids += item['id'] + '.' - self.assertEqual(actual_ids, expected_ids) - - # verify by_page - # the options is not copied, therefore it need to be restored - query_iterable = created_collection.query_items_change_feed( - continuation=continuation2, - max_item_count=pageSize, - partition_key=partition_key - ) - count = 0 - expected_count = 2 - all_fetched_res = [] - for page in query_iterable.by_page(): - fetched_res = list(page) - self.assertEqual(len(fetched_res), min(pageSize, expected_count - count)) - count += len(fetched_res) - all_fetched_res.extend(fetched_res) - - actual_ids = '' - for item in all_fetched_res: - actual_ids += item['id'] + '.' - self.assertEqual(actual_ids, expected_ids) - - # verify reading change feed from the beginning - query_iterable = created_collection.query_items_change_feed( - is_start_from_beginning=True, - partition_key=partition_key - ) - expected_ids = ['doc1', 'doc2', 'doc3'] - it = query_iterable.__iter__() - for i in range(0, len(expected_ids)): - doc = next(it) - self.assertEqual(doc['id'], expected_ids[i]) - self.assertTrue('etag' in created_collection.client_connection.last_response_headers) - continuation3 = created_collection.client_connection.last_response_headers['etag'] - - # verify reading empty change feed - query_iterable = created_collection.query_items_change_feed( - continuation=continuation3, - is_start_from_beginning=True, - partition_key=partition_key - ) - iter_list = list(query_iterable) - self.assertEqual(len(iter_list), 0) - self.created_db.delete_container(created_collection.id) - - # TODO: partition key range id 0 is relative to the way collection is created - @pytest.mark.skip - def test_query_change_feed_with_pk_range_id(self): - created_collection = self.created_db.create_container("change_feed_test_" + str(uuid.uuid4()), - PartitionKey(path="/pk")) - # The test targets partition #3 - partition_key_range_id = 0 - partitionParam = {"partition_key_range_id": partition_key_range_id} - - # Read change feed without passing any options - query_iterable = created_collection.query_items_change_feed() - iter_list = list(query_iterable) - self.assertEqual(len(iter_list), 0) - - # Read change feed from current should return an empty list - query_iterable = created_collection.query_items_change_feed(**partitionParam) - iter_list = list(query_iterable) - self.assertEqual(len(iter_list), 0) - self.assertTrue('etag' in created_collection.client_connection.last_response_headers) - self.assertNotEqual(created_collection.client_connection.last_response_headers['etag'], '') - - # Read change feed from beginning should return an empty list - query_iterable = created_collection.query_items_change_feed( - is_start_from_beginning=True, - **partitionParam - ) - iter_list = list(query_iterable) - self.assertEqual(len(iter_list), 0) - self.assertTrue('etag' in created_collection.client_connection.last_response_headers) - continuation1 = created_collection.client_connection.last_response_headers['etag'] - self.assertNotEqual(continuation1, '') - - # Create a document. Read change feed should return be able to read that document - document_definition = {'pk': 'pk', 'id': 'doc1'} - created_collection.create_item(body=document_definition) - query_iterable = created_collection.query_items_change_feed( - is_start_from_beginning=True, - **partitionParam - ) - iter_list = list(query_iterable) - self.assertEqual(len(iter_list), 1) - self.assertEqual(iter_list[0]['id'], 'doc1') - self.assertTrue('etag' in created_collection.client_connection.last_response_headers) - continuation2 = created_collection.client_connection.last_response_headers['etag'] - self.assertNotEqual(continuation2, '') - self.assertNotEqual(continuation2, continuation1) - - # Create two new documents. Verify that change feed contains the 2 new documents - # with page size 1 and page size 100 - document_definition = {'pk': 'pk', 'id': 'doc2'} - created_collection.create_item(body=document_definition) - document_definition = {'pk': 'pk', 'id': 'doc3'} - created_collection.create_item(body=document_definition) - - for pageSize in [1, 100]: - # verify iterator - query_iterable = created_collection.query_items_change_feed( - continuation=continuation2, - max_item_count=pageSize, - **partitionParam - ) - it = query_iterable.__iter__() - expected_ids = 'doc2.doc3.' - actual_ids = '' - for item in it: - actual_ids += item['id'] + '.' - self.assertEqual(actual_ids, expected_ids) - - # verify by_page - # the options is not copied, therefore it need to be restored - query_iterable = created_collection.query_items_change_feed( - continuation=continuation2, - max_item_count=pageSize, - **partitionParam - ) - count = 0 - expected_count = 2 - all_fetched_res = [] - for page in query_iterable.by_page(): - fetched_res = list(page) - self.assertEqual(len(fetched_res), min(pageSize, expected_count - count)) - count += len(fetched_res) - all_fetched_res.extend(fetched_res) - - actual_ids = '' - for item in all_fetched_res: - actual_ids += item['id'] + '.' - self.assertEqual(actual_ids, expected_ids) - - # verify reading change feed from the beginning - query_iterable = created_collection.query_items_change_feed( - is_start_from_beginning=True, - **partitionParam - ) - expected_ids = ['doc1', 'doc2', 'doc3'] - it = query_iterable.__iter__() - for i in range(0, len(expected_ids)): - doc = next(it) - self.assertEqual(doc['id'], expected_ids[i]) - self.assertTrue('etag' in created_collection.client_connection.last_response_headers) - continuation3 = created_collection.client_connection.last_response_headers['etag'] - - # verify reading empty change feed - query_iterable = created_collection.query_items_change_feed( - continuation=continuation3, - is_start_from_beginning=True, - **partitionParam - ) - iter_list = list(query_iterable) - self.assertEqual(len(iter_list), 0) - self.created_db.delete_container(created_collection.id) - - def test_query_change_feed_with_start_time(self): - created_collection = self.created_db.create_container_if_not_exists("query_change_feed_start_time_test", - PartitionKey(path="/pk")) - batchSize = 50 - - def round_time(): - utc_now = datetime.now(timezone.utc) - return utc_now - timedelta(microseconds=utc_now.microsecond) - def create_random_items(container, batch_size): - for _ in range(batch_size): - # Generate a Random partition key - partition_key = 'pk' + str(uuid.uuid4()) - - # Generate a random item - item = { - 'id': 'item' + str(uuid.uuid4()), - 'partitionKey': partition_key, - 'content': 'This is some random content', - } - - try: - # Create the item in the container - container.upsert_item(item) - except exceptions.CosmosHttpResponseError as e: - self.fail(e) - - # Create first batch of random items - create_random_items(created_collection, batchSize) - - # wait for 1 second and record the time, then wait another second - sleep(1) - start_time = round_time() - not_utc_time = datetime.now() - sleep(1) - - # now create another batch of items - create_random_items(created_collection, batchSize) - - # now query change feed based on start time - change_feed_iter = list(created_collection.query_items_change_feed(start_time=start_time)) - totalCount = len(change_feed_iter) - - # now check if the number of items that were changed match the batch size - self.assertEqual(totalCount, batchSize) - - # negative test: pass in a valid time in the future - future_time = start_time + timedelta(hours=1) - change_feed_iter = list(created_collection.query_items_change_feed(start_time=future_time)) - totalCount = len(change_feed_iter) - # A future time should return 0 - self.assertEqual(totalCount, 0) - - # test a date that is not utc, will be converted to utc by sdk - change_feed_iter = list(created_collection.query_items_change_feed(start_time=not_utc_time)) - totalCount = len(change_feed_iter) - # Should equal batch size - self.assertEqual(totalCount, batchSize) - - # test an invalid value, Attribute error will be raised for passing non datetime object - invalid_time = "Invalid value" - try: - change_feed_iter = list(created_collection.query_items_change_feed(start_time=invalid_time)) - self.fail("Cannot format date on a non datetime object.") - except AttributeError as e: - self.assertTrue("'str' object has no attribute 'astimezone'" == e.args[0]) - def test_populate_query_metrics(self): created_collection = self.created_db.create_container("query_metrics_test", PartitionKey(path="/pk")) diff --git a/sdk/cosmos/azure-cosmos/test/test_query_async.py b/sdk/cosmos/azure-cosmos/test/test_query_async.py index 0ff171efd5a7..4866b36b75af 100644 --- a/sdk/cosmos/azure-cosmos/test/test_query_async.py +++ b/sdk/cosmos/azure-cosmos/test/test_query_async.py @@ -4,8 +4,7 @@ import os import unittest import uuid -from asyncio import sleep, gather -from datetime import datetime, timedelta, timezone +from asyncio import gather import pytest @@ -14,10 +13,10 @@ import test_config from azure.cosmos import http_constants, _endpoint_discovery_retry_policy from azure.cosmos._execution_context.query_execution_info import _PartitionedQueryExecutionInfo +from azure.cosmos._retry_options import RetryOptions from azure.cosmos.aio import CosmosClient, DatabaseProxy, ContainerProxy from azure.cosmos.documents import _DistinctType from azure.cosmos.partition_key import PartitionKey -from azure.cosmos._retry_options import RetryOptions @pytest.mark.cosmosEmulator @@ -69,329 +68,6 @@ async def test_first_and_last_slashes_trimmed_for_query_string_async(self): await self.created_db.delete_container(created_collection.id) - async def test_query_change_feed_with_pk_async(self): - created_collection = await self.created_db.create_container( - "change_feed_test_" + str(uuid.uuid4()), - PartitionKey(path="/pk")) - # The test targets partition #3 - partition_key = "pk" - - # Read change feed without passing any options - query_iterable = created_collection.query_items_change_feed() - iter_list = [item async for item in query_iterable] - assert len(iter_list) == 0 - - # Read change feed from current should return an empty list - query_iterable = created_collection.query_items_change_feed(partition_key=partition_key) - iter_list = [item async for item in query_iterable] - assert len(iter_list) == 0 - if 'Etag' in created_collection.client_connection.last_response_headers: - assert created_collection.client_connection.last_response_headers['Etag'] != '' - elif 'etag' in created_collection.client_connection.last_response_headers: - assert created_collection.client_connection.last_response_headers['etag'] != '' - else: - self.fail("No Etag or etag found in last response headers") - - # Read change feed from beginning should return an empty list - query_iterable = created_collection.query_items_change_feed( - is_start_from_beginning=True, - partition_key=partition_key - ) - iter_list = [item async for item in query_iterable] - assert len(iter_list) == 0 - if 'Etag' in created_collection.client_connection.last_response_headers: - continuation1 = created_collection.client_connection.last_response_headers['Etag'] - elif 'etag' in created_collection.client_connection.last_response_headers: - continuation1 = created_collection.client_connection.last_response_headers['etag'] - else: - self.fail("No Etag or etag found in last response headers") - assert continuation1 != '' - - # Create a document. Read change feed should return be able to read that document - document_definition = {'pk': 'pk', 'id': 'doc1'} - await created_collection.create_item(body=document_definition) - query_iterable = created_collection.query_items_change_feed( - is_start_from_beginning=True, - partition_key=partition_key - ) - iter_list = [item async for item in query_iterable] - assert len(iter_list) == 1 - assert iter_list[0]['id'] == 'doc1' - if 'Etag' in created_collection.client_connection.last_response_headers: - continuation2 = created_collection.client_connection.last_response_headers['Etag'] - elif 'etag' in created_collection.client_connection.last_response_headers: - continuation2 = created_collection.client_connection.last_response_headers['etag'] - else: - self.fail("No Etag or etag found in last response headers") - assert continuation2 != '' - assert continuation2 != continuation1 - - # Create two new documents. Verify that change feed contains the 2 new documents - # with page size 1 and page size 100 - document_definition = {'pk': 'pk', 'id': 'doc2'} - await created_collection.create_item(body=document_definition) - document_definition = {'pk': 'pk', 'id': 'doc3'} - await created_collection.create_item(body=document_definition) - - for pageSize in [2, 100]: - # verify iterator - query_iterable = created_collection.query_items_change_feed( - continuation=continuation2, - max_item_count=pageSize, - partition_key=partition_key) - it = query_iterable.__aiter__() - expected_ids = 'doc2.doc3.' - actual_ids = '' - async for item in it: - actual_ids += item['id'] + '.' - assert actual_ids == expected_ids - - # verify by_page - # the options is not copied, therefore it need to be restored - query_iterable = created_collection.query_items_change_feed( - continuation=continuation2, - max_item_count=pageSize, - partition_key=partition_key - ) - count = 0 - expected_count = 2 - all_fetched_res = [] - pages = query_iterable.by_page() - async for items in await pages.__anext__(): - count += 1 - all_fetched_res.append(items) - assert count == expected_count - - actual_ids = '' - for item in all_fetched_res: - actual_ids += item['id'] + '.' - assert actual_ids == expected_ids - - # verify reading change feed from the beginning - query_iterable = created_collection.query_items_change_feed( - is_start_from_beginning=True, - partition_key=partition_key - ) - expected_ids = ['doc1', 'doc2', 'doc3'] - it = query_iterable.__aiter__() - for i in range(0, len(expected_ids)): - doc = await it.__anext__() - assert doc['id'] == expected_ids[i] - if 'Etag' in created_collection.client_connection.last_response_headers: - continuation3 = created_collection.client_connection.last_response_headers['Etag'] - elif 'etag' in created_collection.client_connection.last_response_headers: - continuation3 = created_collection.client_connection.last_response_headers['etag'] - else: - self.fail("No Etag or etag found in last response headers") - - # verify reading empty change feed - query_iterable = created_collection.query_items_change_feed( - continuation=continuation3, - is_start_from_beginning=True, - partition_key=partition_key - ) - iter_list = [item async for item in query_iterable] - assert len(iter_list) == 0 - - await self.created_db.delete_container(created_collection.id) - - # TODO: partition key range id 0 is relative to the way collection is created - @pytest.mark.skip - async def test_query_change_feed_with_pk_range_id_async(self): - created_collection = await self.created_db.create_container("cf_test_" + str(uuid.uuid4()), - PartitionKey(path="/pk")) - # The test targets partition #3 - partition_key_range_id = 0 - partition_param = {"partition_key_range_id": partition_key_range_id} - - # Read change feed without passing any options - query_iterable = created_collection.query_items_change_feed() - iter_list = [item async for item in query_iterable] - assert len(iter_list) == 0 - - # Read change feed from current should return an empty list - query_iterable = created_collection.query_items_change_feed(**partition_param) - iter_list = [item async for item in query_iterable] - assert len(iter_list) == 0 - if 'Etag' in created_collection.client_connection.last_response_headers: - assert created_collection.client_connection.last_response_headers['Etag'] - elif 'etag' in created_collection.client_connection.last_response_headers: - assert created_collection.client_connection.last_response_headers['etag'] - else: - self.fail("No Etag or etag found in last response headers") - - # Read change feed from beginning should return an empty list - query_iterable = created_collection.query_items_change_feed( - is_start_from_beginning=True, - **partition_param - ) - iter_list = [item async for item in query_iterable] - assert len(iter_list) == 0 - if 'Etag' in created_collection.client_connection.last_response_headers: - continuation1 = created_collection.client_connection.last_response_headers['Etag'] - elif 'etag' in created_collection.client_connection.last_response_headers: - continuation1 = created_collection.client_connection.last_response_headers['etag'] - else: - self.fail("No Etag or etag found in last response headers") - assert continuation1 != '' - - # Create a document. Read change feed should return be able to read that document - document_definition = {'pk': 'pk', 'id': 'doc1'} - await created_collection.create_item(body=document_definition) - query_iterable = created_collection.query_items_change_feed( - is_start_from_beginning=True, - **partition_param - ) - iter_list = [item async for item in query_iterable] - assert len(iter_list) == 1 - assert iter_list[0]['id'] == 'doc1' - if 'Etag' in created_collection.client_connection.last_response_headers: - continuation2 = created_collection.client_connection.last_response_headers['Etag'] - elif 'etag' in created_collection.client_connection.last_response_headers: - continuation2 = created_collection.client_connection.last_response_headers['etag'] - else: - self.fail("No Etag or etag found in last response headers") - assert continuation2 != '' - assert continuation2 != continuation1 - - # Create two new documents. Verify that change feed contains the 2 new documents - # with page size 1 and page size 100 - document_definition = {'pk': 'pk', 'id': 'doc2'} - await created_collection.create_item(body=document_definition) - document_definition = {'pk': 'pk', 'id': 'doc3'} - await created_collection.create_item(body=document_definition) - - for pageSize in [2, 100]: - # verify iterator - query_iterable = created_collection.query_items_change_feed( - continuation=continuation2, - max_item_count=pageSize, - **partition_param - ) - it = query_iterable.__aiter__() - expected_ids = 'doc2.doc3.' - actual_ids = '' - async for item in it: - actual_ids += item['id'] + '.' - assert actual_ids == expected_ids - - # verify by_page - # the options is not copied, therefore it need to be restored - query_iterable = created_collection.query_items_change_feed( - continuation=continuation2, - max_item_count=pageSize, - **partition_param - ) - count = 0 - expected_count = 2 - all_fetched_res = [] - pages = query_iterable.by_page() - async for items in await pages.__anext__(): - count += 1 - all_fetched_res.append(items) - assert count == expected_count - - actual_ids = '' - for item in all_fetched_res: - actual_ids += item['id'] + '.' - assert actual_ids == expected_ids - - # verify reading change feed from the beginning - query_iterable = created_collection.query_items_change_feed( - is_start_from_beginning=True, - **partition_param - ) - expected_ids = ['doc1', 'doc2', 'doc3'] - it = query_iterable.__aiter__() - for i in range(0, len(expected_ids)): - doc = await it.__anext__() - assert doc['id'] == expected_ids[i] - if 'Etag' in created_collection.client_connection.last_response_headers: - continuation3 = created_collection.client_connection.last_response_headers['Etag'] - elif 'etag' in created_collection.client_connection.last_response_headers: - continuation3 = created_collection.client_connection.last_response_headers['etag'] - else: - self.fail("No Etag or etag found in last response headers") - - # verify reading empty change feed - query_iterable = created_collection.query_items_change_feed( - continuation=continuation3, - is_start_from_beginning=True, - **partition_param - ) - iter_list = [item async for item in query_iterable] - assert len(iter_list) == 0 - - @pytest.mark.asyncio - async def test_query_change_feed_with_start_time(self): - created_collection = await self.created_db.create_container_if_not_exists("query_change_feed_start_time_test", - PartitionKey(path="/pk")) - batchSize = 50 - - def round_time(): - utc_now = datetime.now(timezone.utc) - return utc_now - timedelta(microseconds=utc_now.microsecond) - - async def create_random_items(container, batch_size): - for _ in range(batch_size): - # Generate a Random partition key - partition_key = 'pk' + str(uuid.uuid4()) - - # Generate a random item - item = { - 'id': 'item' + str(uuid.uuid4()), - 'partitionKey': partition_key, - 'content': 'This is some random content', - } - - try: - # Create the item in the container - await container.upsert_item(item) - except exceptions.CosmosHttpResponseError as e: - pytest.fail(e) - - # Create first batch of random items - await create_random_items(created_collection, batchSize) - - # wait for 1 second and record the time, then wait another second - await sleep(1) - start_time = round_time() - not_utc_time = datetime.now() - await sleep(1) - - # now create another batch of items - await create_random_items(created_collection, batchSize) - - # now query change feed based on start time - change_feed_iter = [i async for i in created_collection.query_items_change_feed(start_time=start_time)] - totalCount = len(change_feed_iter) - - # now check if the number of items that were changed match the batch size - assert totalCount == batchSize - - # negative test: pass in a valid time in the future - future_time = start_time + timedelta(hours=1) - change_feed_iter = [i async for i in created_collection.query_items_change_feed(start_time=future_time)] - totalCount = len(change_feed_iter) - # A future time should return 0 - assert totalCount == 0 - - # test a date that is not utc, will be converted to utc by sdk - change_feed_iter = [i async for i in created_collection.query_items_change_feed(start_time=not_utc_time)] - totalCount = len(change_feed_iter) - # Should equal batch size - assert totalCount == batchSize - - # test an invalid value, Attribute error will be raised for passing non datetime object - invalid_time = "Invalid value" - try: - change_feed_iter = [i async for i in created_collection.query_items_change_feed(start_time=invalid_time)] - self.fail("Cannot format date on a non datetime object.") - except AttributeError as e: - assert ("'str' object has no attribute 'astimezone'" == e.args[0]) - - await self.created_db.delete_container(created_collection.id) - @pytest.mark.asyncio async def test_populate_query_metrics_async(self): created_collection = await self.created_db.create_container(