Skip to content

Commit

Permalink
[ONCALL #6352] stripe RFR pagination issue (#44862)
Browse files Browse the repository at this point in the history
  • Loading branch information
maxi297 authored Aug 30, 2024
1 parent 89e890d commit ceedfa3
Show file tree
Hide file tree
Showing 5 changed files with 144 additions and 32 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -10,7 +10,7 @@ data:
connectorSubtype: api
connectorType: source
definitionId: e094cb9a-26de-4645-8761-65c0c425d1de
dockerImageTag: 5.5.1
dockerImageTag: 5.5.2
dockerRepository: airbyte/source-stripe
documentationUrl: https://docs.airbyte.com/integrations/sources/stripe
erdUrl: https://dbdocs.io/airbyteio/source-stripe?view=relationships
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,7 @@ requires = [ "poetry-core>=1.0.0",]
build-backend = "poetry.core.masonry.api"

[tool.poetry]
version = "5.5.1"
version = "5.5.2"
name = "source-stripe"
description = "Source implementation for Stripe."
authors = [ "Airbyte <contact@airbyte.io>",]
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -14,6 +14,9 @@
from airbyte_cdk import BackoffStrategy
from airbyte_cdk.models import SyncMode
from airbyte_cdk.sources.declarative.requesters.error_handlers.backoff_strategies import ExponentialBackoffStrategy
from airbyte_cdk.sources.streams.checkpoint import Cursor
from airbyte_cdk.sources.streams.checkpoint.resumable_full_refresh_cursor import ResumableFullRefreshCursor
from airbyte_cdk.sources.streams.checkpoint.substream_resumable_full_refresh_cursor import SubstreamResumableFullRefreshCursor
from airbyte_cdk.sources.streams.core import StreamData
from airbyte_cdk.sources.streams.http import HttpStream, HttpSubStream
from airbyte_cdk.sources.streams.http.error_handlers import ErrorHandler
Expand Down Expand Up @@ -208,6 +211,21 @@ def retry_factor(self) -> float:
"""
return 0 if IS_TESTING else super(StripeStream, self).retry_factor

def get_cursor(self) -> Optional[Cursor]:
"""
RFR is breaking the pagination in Stripe today. The stream is instantiated using the stream facade here. During the read, this goes
through the concurrent code here so that we can read full refresh streams concurrently.
However, as there are no cursors and the read records is the one from the HttpStream, we end up assigning the
ResumableFullRefresCursor and hence only read a single page.
In order to avoid that, we will assume there are no cursor if the cursor if RFR.
"""
parent_cursor = super().get_cursor()
if isinstance(parent_cursor, (ResumableFullRefreshCursor, SubstreamResumableFullRefreshCursor)):
return None
return parent_cursor


class IStreamSelector(ABC):
@abstractmethod
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,93 @@
# Copyright (c) 2023 Airbyte, Inc., all rights reserved.

from datetime import datetime, timedelta, timezone
from unittest import TestCase

import freezegun
from airbyte_cdk.test.catalog_builder import CatalogBuilder
from airbyte_cdk.test.entrypoint_wrapper import read
from airbyte_cdk.test.mock_http import HttpMocker
from airbyte_cdk.test.mock_http.response_builder import (
FieldPath,
HttpResponseBuilder,
RecordBuilder,
create_record_builder,
create_response_builder,
find_template,
)
from airbyte_cdk.test.state_builder import StateBuilder
from airbyte_protocol.models import ConfiguredAirbyteCatalog, SyncMode
from integration.config import ConfigBuilder
from integration.pagination import StripePaginationStrategy
from integration.request_builder import StripeRequestBuilder
from source_stripe import SourceStripe

_STREAM_NAME = "accounts"
_ACCOUNT_ID = "acct_1G9HZLIEn49ers"
_CLIENT_SECRET = "ConfigBuilder default client secret"
_NOW = datetime.now(timezone.utc)
_CONFIG = {
"client_secret": _CLIENT_SECRET,
"account_id": _ACCOUNT_ID,
}
_NO_STATE = StateBuilder().build()
_AVOIDING_INCLUSIVE_BOUNDARIES = timedelta(seconds=1)


def _create_config() -> ConfigBuilder:
return ConfigBuilder().with_account_id(_ACCOUNT_ID).with_client_secret(_CLIENT_SECRET)


def _create_catalog(sync_mode: SyncMode = SyncMode.full_refresh) -> ConfiguredAirbyteCatalog:
return CatalogBuilder().with_stream(name="accounts", sync_mode=sync_mode).build()


def _create_accounts_request() -> StripeRequestBuilder:
return StripeRequestBuilder.accounts_endpoint(_ACCOUNT_ID, _CLIENT_SECRET)


def _create_response() -> HttpResponseBuilder:
return create_response_builder(
response_template=find_template(_STREAM_NAME, __file__),
records_path=FieldPath("data"),
pagination_strategy=StripePaginationStrategy(),
)


def _create_record() -> RecordBuilder:
return create_record_builder(
find_template(_STREAM_NAME, __file__),
FieldPath("data"),
record_id_path=FieldPath("id"),
)


@freezegun.freeze_time(_NOW.isoformat())
class AccountsTest(TestCase):
@HttpMocker()
def test_full_refresh(self, http_mocker: HttpMocker) -> None:
http_mocker.get(
_create_accounts_request().with_limit(100).build(),
_create_response().with_record(record=_create_record()).build(),
)

source = SourceStripe(config=_CONFIG, catalog=_create_catalog(), state=_NO_STATE)
actual_messages = read(source, config=_CONFIG, catalog=_create_catalog())

assert len(actual_messages.records) == 1

@HttpMocker()
def test_pagination(self, http_mocker: HttpMocker) -> None:
http_mocker.get(
_create_accounts_request().with_limit(100).build(),
_create_response().with_record(record=_create_record().with_id("last_record_id_from_first_page")).with_pagination().build(),
)
http_mocker.get(
_create_accounts_request().with_limit(100).with_starting_after("last_record_id_from_first_page").build(),
_create_response().with_record(record=_create_record()).build(),
)

source = SourceStripe(config=_CONFIG, catalog=_create_catalog(), state=_NO_STATE)
actual_messages = read(source, config=_CONFIG, catalog=_create_catalog())

assert len(actual_messages.records) == 2
Loading

0 comments on commit ceedfa3

Please sign in to comment.