forked from airbytehq/airbyte
-
Notifications
You must be signed in to change notification settings - Fork 0
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
[per-stream cdk] Support deserialization of legacy and per-stream sta…
…te (airbytehq#16205) * interpret legacy and new per-stream format into AirbyteStateMessages * add ConnectorStateManager stubs for future work * remove frozen for the time being until we need to hash descriptors * add validation that AirbyteStateMessage has at least one of stream, global, or data fields * pr feedback and clean up of the code * remove changes to airbyte_protocol and perform validation in read_state() * fix import formatting
- Loading branch information
Showing
5 changed files
with
322 additions
and
23 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
54 changes: 54 additions & 0 deletions
54
airbyte-cdk/python/airbyte_cdk/sources/connector_state_manager.py
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,54 @@ | ||
# | ||
# Copyright (c) 2022 Airbyte, Inc., all rights reserved. | ||
# | ||
|
||
import copy | ||
from typing import Any, List, Mapping, MutableMapping, Union | ||
|
||
from airbyte_cdk.models import AirbyteStateBlob, AirbyteStateMessage, AirbyteStateType | ||
|
||
|
||
class ConnectorStateManager: | ||
""" | ||
ConnectorStateManager consolidates the various forms of a stream's incoming state message (STREAM / GLOBAL / LEGACY) under a common | ||
interface. It also provides methods to extract and update state | ||
""" | ||
|
||
# In the immediate, we only persist legacy which will be used during abstract_source.read(). In the subsequent PRs we will | ||
# initialize the ConnectorStateManager according to the new per-stream interface received from the platform | ||
def __init__(self, state: Union[List[AirbyteStateMessage], MutableMapping[str, Any]] = None): | ||
if not state: | ||
self.legacy = {} | ||
elif self.is_migrated_legacy_state(state): | ||
# The legacy state format received from the platform is parsed and stored as a single AirbyteStateMessage when reading | ||
# the file. This is used for input backwards compatibility. | ||
self.legacy = state[0].data | ||
elif isinstance(state, MutableMapping): | ||
# In the event that legacy state comes in as its original JSON object format, no changes to the input need to be made | ||
self.legacy = state | ||
else: | ||
raise ValueError("Input state should come in the form of list of Airbyte state messages or a mapping of states") | ||
|
||
def get_stream_state(self, namespace: str, stream_name: str) -> AirbyteStateBlob: | ||
# todo implement in upcoming PRs | ||
pass | ||
|
||
def get_legacy_state(self) -> MutableMapping[str, Any]: | ||
""" | ||
Returns a deep copy of the current legacy state dictionary made up of the state of all streams for a connector | ||
:return: A copy of the legacy state | ||
""" | ||
return copy.deepcopy(self.legacy, {}) | ||
|
||
def update_state_for_stream(self, namespace: str, stream_name: str, value: Mapping[str, Any]): | ||
# todo implement in upcoming PRs | ||
pass | ||
|
||
@staticmethod | ||
def is_migrated_legacy_state(state: List[AirbyteStateMessage]) -> bool: | ||
return ( | ||
isinstance(state, List) | ||
and len(state) == 1 | ||
and isinstance(state[0], AirbyteStateMessage) | ||
and state[0].type == AirbyteStateType.LEGACY | ||
) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
39 changes: 39 additions & 0 deletions
39
airbyte-cdk/python/unit_tests/sources/test_connector_state_manager.py
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,39 @@ | ||
# | ||
# Copyright (c) 2022 Airbyte, Inc., all rights reserved. | ||
# | ||
|
||
from contextlib import nullcontext as does_not_raise | ||
|
||
import pytest | ||
from airbyte_cdk.models import AirbyteStateMessage, AirbyteStateType | ||
from airbyte_cdk.sources.connector_state_manager import ConnectorStateManager | ||
|
||
|
||
@pytest.mark.parametrize( | ||
"input_state, expected_legacy_state, expected_error", | ||
[ | ||
pytest.param( | ||
[AirbyteStateMessage(type=AirbyteStateType.LEGACY, data={"actresses": {"id": "seehorn_rhea"}})], | ||
{"actresses": {"id": "seehorn_rhea"}}, | ||
does_not_raise(), | ||
id="test_legacy_input_state", | ||
), | ||
pytest.param( | ||
{ | ||
"actors": {"created_at": "1962-10-22"}, | ||
"actresses": {"id": "seehorn_rhea"}, | ||
}, | ||
{"actors": {"created_at": "1962-10-22"}, "actresses": {"id": "seehorn_rhea"}}, | ||
does_not_raise(), | ||
id="test_supports_legacy_json_blob", | ||
), | ||
pytest.param({}, {}, does_not_raise(), id="test_initialize_empty_mapping_by_default"), | ||
pytest.param([], {}, does_not_raise(), id="test_initialize_empty_state"), | ||
pytest.param("strings_are_not_allowed", None, pytest.raises(ValueError), id="test_value_error_is_raised_on_invalid_state_input"), | ||
], | ||
) | ||
def test_get_legacy_state(input_state, expected_legacy_state, expected_error): | ||
with expected_error: | ||
state_manager = ConnectorStateManager(input_state) | ||
actual_legacy_state = state_manager.get_legacy_state() | ||
assert actual_legacy_state == expected_legacy_state |
Oops, something went wrong.