Skip to content

Commit c63223a

Browse files
feat: Replace HTTP downloads with sparse git clone for metadata schemas
- Switch from downloading individual YAML files via GitHub API to using sparse git clone - Eliminates rate limiting issues (60 req/hour -> no API calls) - Fix single-file model generation to properly filter out relative imports - Add multi-line import block detection and filtering - Generate and commit metadata models and consolidated JSON schema artifacts Co-Authored-By: AJ Steers <aj@airbyte.io>
1 parent 07d7014 commit c63223a

File tree

4 files changed

+2130
-113
lines changed

4 files changed

+2130
-113
lines changed

airbyte_cdk/sources/declarative/models/declarative_component_schema.py

Lines changed: 86 additions & 60 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,3 @@
1-
# Copyright (c) 2025 Airbyte, Inc., all rights reserved.
2-
31
# generated by datamodel-codegen:
42
# filename: declarative_component_schema.yaml
53

@@ -928,24 +926,28 @@ class OAuthConfigSpecification(BaseModel):
928926
class Config:
929927
extra = Extra.allow
930928

931-
oauth_user_input_from_connector_config_specification: Optional[Dict[str, Any]] = Field(
932-
None,
933-
description="OAuth specific blob. This is a Json Schema used to validate Json configurations used as input to OAuth.\nMust be a valid non-nested JSON that refers to properties from ConnectorSpecification.connectionSpecification\nusing special annotation 'path_in_connector_config'.\nThese are input values the user is entering through the UI to authenticate to the connector, that might also shared\nas inputs for syncing data via the connector.\nExamples:\nif no connector values is shared during oauth flow, oauth_user_input_from_connector_config_specification=[]\nif connector values such as 'app_id' inside the top level are used to generate the API url for the oauth flow,\n oauth_user_input_from_connector_config_specification={\n app_id: {\n type: string\n path_in_connector_config: ['app_id']\n }\n }\nif connector values such as 'info.app_id' nested inside another object are used to generate the API url for the oauth flow,\n oauth_user_input_from_connector_config_specification={\n app_id: {\n type: string\n path_in_connector_config: ['info', 'app_id']\n }\n }",
934-
examples=[
935-
{"app_id": {"type": "string", "path_in_connector_config": ["app_id"]}},
936-
{
937-
"app_id": {
938-
"type": "string",
939-
"path_in_connector_config": ["info", "app_id"],
940-
}
941-
},
942-
],
943-
title="OAuth user input",
929+
oauth_user_input_from_connector_config_specification: Optional[Dict[str, Any]] = (
930+
Field(
931+
None,
932+
description="OAuth specific blob. This is a Json Schema used to validate Json configurations used as input to OAuth.\nMust be a valid non-nested JSON that refers to properties from ConnectorSpecification.connectionSpecification\nusing special annotation 'path_in_connector_config'.\nThese are input values the user is entering through the UI to authenticate to the connector, that might also shared\nas inputs for syncing data via the connector.\nExamples:\nif no connector values is shared during oauth flow, oauth_user_input_from_connector_config_specification=[]\nif connector values such as 'app_id' inside the top level are used to generate the API url for the oauth flow,\n oauth_user_input_from_connector_config_specification={\n app_id: {\n type: string\n path_in_connector_config: ['app_id']\n }\n }\nif connector values such as 'info.app_id' nested inside another object are used to generate the API url for the oauth flow,\n oauth_user_input_from_connector_config_specification={\n app_id: {\n type: string\n path_in_connector_config: ['info', 'app_id']\n }\n }",
933+
examples=[
934+
{"app_id": {"type": "string", "path_in_connector_config": ["app_id"]}},
935+
{
936+
"app_id": {
937+
"type": "string",
938+
"path_in_connector_config": ["info", "app_id"],
939+
}
940+
},
941+
],
942+
title="OAuth user input",
943+
)
944944
)
945-
oauth_connector_input_specification: Optional[OauthConnectorInputSpecification] = Field(
946-
None,
947-
description='The DeclarativeOAuth specific blob.\nPertains to the fields defined by the connector relating to the OAuth flow.\n\nInterpolation capabilities:\n- The variables placeholders are declared as `{{my_var}}`.\n- The nested resolution variables like `{{ {{my_nested_var}} }}` is allowed as well.\n\n- The allowed interpolation context is:\n + base64Encoder - encode to `base64`, {{ {{my_var_a}}:{{my_var_b}} | base64Encoder }}\n + base64Decorer - decode from `base64` encoded string, {{ {{my_string_variable_or_string_value}} | base64Decoder }}\n + urlEncoder - encode the input string to URL-like format, {{ https://test.host.com/endpoint | urlEncoder}}\n + urlDecorer - decode the input url-encoded string into text format, {{ urlDecoder:https%3A%2F%2Fairbyte.io | urlDecoder}}\n + codeChallengeS256 - get the `codeChallenge` encoded value to provide additional data-provider specific authorisation values, {{ {{state_value}} | codeChallengeS256 }}\n\nExamples:\n - The TikTok Marketing DeclarativeOAuth spec:\n {\n "oauth_connector_input_specification": {\n "type": "object",\n "additionalProperties": false,\n "properties": {\n "consent_url": "https://ads.tiktok.com/marketing_api/auth?{{client_id_key}}={{client_id_value}}&{{redirect_uri_key}}={{ {{redirect_uri_value}} | urlEncoder}}&{{state_key}}={{state_value}}",\n "access_token_url": "https://business-api.tiktok.com/open_api/v1.3/oauth2/access_token/",\n "access_token_params": {\n "{{ auth_code_key }}": "{{ auth_code_value }}",\n "{{ client_id_key }}": "{{ client_id_value }}",\n "{{ client_secret_key }}": "{{ client_secret_value }}"\n },\n "access_token_headers": {\n "Content-Type": "application/json",\n "Accept": "application/json"\n },\n "extract_output": ["data.access_token"],\n "client_id_key": "app_id",\n "client_secret_key": "secret",\n "auth_code_key": "auth_code"\n }\n }\n }',
948-
title="DeclarativeOAuth Connector Specification",
945+
oauth_connector_input_specification: Optional[OauthConnectorInputSpecification] = (
946+
Field(
947+
None,
948+
description='The DeclarativeOAuth specific blob.\nPertains to the fields defined by the connector relating to the OAuth flow.\n\nInterpolation capabilities:\n- The variables placeholders are declared as `{{my_var}}`.\n- The nested resolution variables like `{{ {{my_nested_var}} }}` is allowed as well.\n\n- The allowed interpolation context is:\n + base64Encoder - encode to `base64`, {{ {{my_var_a}}:{{my_var_b}} | base64Encoder }}\n + base64Decorer - decode from `base64` encoded string, {{ {{my_string_variable_or_string_value}} | base64Decoder }}\n + urlEncoder - encode the input string to URL-like format, {{ https://test.host.com/endpoint | urlEncoder}}\n + urlDecorer - decode the input url-encoded string into text format, {{ urlDecoder:https%3A%2F%2Fairbyte.io | urlDecoder}}\n + codeChallengeS256 - get the `codeChallenge` encoded value to provide additional data-provider specific authorisation values, {{ {{state_value}} | codeChallengeS256 }}\n\nExamples:\n - The TikTok Marketing DeclarativeOAuth spec:\n {\n "oauth_connector_input_specification": {\n "type": "object",\n "additionalProperties": false,\n "properties": {\n "consent_url": "https://ads.tiktok.com/marketing_api/auth?{{client_id_key}}={{client_id_value}}&{{redirect_uri_key}}={{ {{redirect_uri_value}} | urlEncoder}}&{{state_key}}={{state_value}}",\n "access_token_url": "https://business-api.tiktok.com/open_api/v1.3/oauth2/access_token/",\n "access_token_params": {\n "{{ auth_code_key }}": "{{ auth_code_value }}",\n "{{ client_id_key }}": "{{ client_id_value }}",\n "{{ client_secret_key }}": "{{ client_secret_value }}"\n },\n "access_token_headers": {\n "Content-Type": "application/json",\n "Accept": "application/json"\n },\n "extract_output": ["data.access_token"],\n "client_id_key": "app_id",\n "client_secret_key": "secret",\n "auth_code_key": "auth_code"\n }\n }\n }',
949+
title="DeclarativeOAuth Connector Specification",
950+
)
949951
)
950952
complete_oauth_output_specification: Optional[Dict[str, Any]] = Field(
951953
None,
@@ -963,7 +965,9 @@ class Config:
963965
complete_oauth_server_input_specification: Optional[Dict[str, Any]] = Field(
964966
None,
965967
description="OAuth specific blob. This is a Json Schema used to validate Json configurations persisted as Airbyte Server configurations.\nMust be a valid non-nested JSON describing additional fields configured by the Airbyte Instance or Workspace Admins to be used by the\nserver when completing an OAuth flow (typically exchanging an auth code for refresh token).\nExamples:\n complete_oauth_server_input_specification={\n client_id: {\n type: string\n },\n client_secret: {\n type: string\n }\n }",
966-
examples=[{"client_id": {"type": "string"}, "client_secret": {"type": "string"}}],
968+
examples=[
969+
{"client_id": {"type": "string"}, "client_secret": {"type": "string"}}
970+
],
967971
title="OAuth input specification",
968972
)
969973
complete_oauth_server_output_specification: Optional[Dict[str, Any]] = Field(
@@ -1467,7 +1471,9 @@ class CustomConfigTransformation(BaseModel):
14671471
class_name: str = Field(
14681472
...,
14691473
description="Fully-qualified name of the class that will be implementing the custom config transformation. The format is `source_<name>.<package>.<class_name>`.",
1470-
examples=["source_declarative_manifest.components.MyCustomConfigTransformation"],
1474+
examples=[
1475+
"source_declarative_manifest.components.MyCustomConfigTransformation"
1476+
],
14711477
)
14721478
parameters: Optional[Dict[str, Any]] = Field(
14731479
None,
@@ -1885,7 +1891,9 @@ class OAuthAuthenticator(BaseModel):
18851891
scopes: Optional[List[str]] = Field(
18861892
None,
18871893
description="List of scopes that should be granted to the access token.",
1888-
examples=[["crm.list.read", "crm.objects.contacts.read", "crm.schema.contacts.read"]],
1894+
examples=[
1895+
["crm.list.read", "crm.objects.contacts.read", "crm.schema.contacts.read"]
1896+
],
18891897
title="Scopes",
18901898
)
18911899
token_expiry_date: Optional[str] = Field(
@@ -2084,7 +2092,9 @@ class RecordSelector(BaseModel):
20842092
description="Responsible for filtering records to be emitted by the Source.",
20852093
title="Record Filter",
20862094
)
2087-
schema_normalization: Optional[Union[SchemaNormalization, CustomSchemaNormalization]] = Field(
2095+
schema_normalization: Optional[
2096+
Union[SchemaNormalization, CustomSchemaNormalization]
2097+
] = Field(
20882098
None,
20892099
description="Responsible for normalization according to the schema.",
20902100
title="Schema Normalization",
@@ -2126,10 +2136,12 @@ class DpathValidator(BaseModel):
21262136
],
21272137
title="Field Path",
21282138
)
2129-
validation_strategy: Union[ValidateAdheresToSchema, CustomValidationStrategy] = Field(
2130-
...,
2131-
description="The condition that the specified config value will be evaluated against",
2132-
title="Validation Strategy",
2139+
validation_strategy: Union[ValidateAdheresToSchema, CustomValidationStrategy] = (
2140+
Field(
2141+
...,
2142+
description="The condition that the specified config value will be evaluated against",
2143+
title="Validation Strategy",
2144+
)
21332145
)
21342146

21352147

@@ -2146,10 +2158,12 @@ class PredicateValidator(BaseModel):
21462158
],
21472159
title="Value",
21482160
)
2149-
validation_strategy: Union[ValidateAdheresToSchema, CustomValidationStrategy] = Field(
2150-
...,
2151-
description="The validation strategy to apply to the value.",
2152-
title="Validation Strategy",
2161+
validation_strategy: Union[ValidateAdheresToSchema, CustomValidationStrategy] = (
2162+
Field(
2163+
...,
2164+
description="The validation strategy to apply to the value.",
2165+
title="Validation Strategy",
2166+
)
21532167
)
21542168

21552169

@@ -2174,12 +2188,12 @@ class ConfigAddFields(BaseModel):
21742188

21752189
class CompositeErrorHandler(BaseModel):
21762190
type: Literal["CompositeErrorHandler"]
2177-
error_handlers: List[Union[CompositeErrorHandler, DefaultErrorHandler, CustomErrorHandler]] = (
2178-
Field(
2179-
...,
2180-
description="List of error handlers to iterate on to determine how to handle a failed response.",
2181-
title="Error Handlers",
2182-
)
2191+
error_handlers: List[
2192+
Union[CompositeErrorHandler, DefaultErrorHandler, CustomErrorHandler]
2193+
] = Field(
2194+
...,
2195+
description="List of error handlers to iterate on to determine how to handle a failed response.",
2196+
title="Error Handlers",
21832197
)
21842198
parameters: Optional[Dict[str, Any]] = Field(None, alias="$parameters")
21852199

@@ -2341,9 +2355,9 @@ class Config:
23412355

23422356
type: Literal["DeclarativeSource"]
23432357
check: Union[CheckStream, CheckDynamicStream]
2344-
streams: Optional[List[Union[ConditionalStreams, DeclarativeStream, StateDelegatingStream]]] = (
2345-
None
2346-
)
2358+
streams: Optional[
2359+
List[Union[ConditionalStreams, DeclarativeStream, StateDelegatingStream]]
2360+
] = None
23472361
dynamic_streams: List[DynamicDeclarativeStream]
23482362
version: str = Field(
23492363
...,
@@ -2468,16 +2482,20 @@ class Config:
24682482
extra = Extra.allow
24692483

24702484
type: Literal["DeclarativeStream"]
2471-
name: Optional[str] = Field("", description="The stream name.", example=["Users"], title="Name")
2485+
name: Optional[str] = Field(
2486+
"", description="The stream name.", example=["Users"], title="Name"
2487+
)
24722488
retriever: Union[SimpleRetriever, AsyncRetriever, CustomRetriever] = Field(
24732489
...,
24742490
description="Component used to coordinate how records are extracted across stream slices and request pages.",
24752491
title="Retriever",
24762492
)
2477-
incremental_sync: Optional[Union[DatetimeBasedCursor, IncrementingCountCursor]] = Field(
2478-
None,
2479-
description="Component used to fetch data incrementally based on a time field in the data.",
2480-
title="Incremental Sync",
2493+
incremental_sync: Optional[Union[DatetimeBasedCursor, IncrementingCountCursor]] = (
2494+
Field(
2495+
None,
2496+
description="Component used to fetch data incrementally based on a time field in the data.",
2497+
title="Incremental Sync",
2498+
)
24812499
)
24822500
primary_key: Optional[PrimaryKey] = Field("", title="Primary Key")
24832501
schema_loader: Optional[
@@ -2651,18 +2669,20 @@ class HttpRequester(BaseModelWithDeprecations):
26512669
description="For APIs that require explicit specification of the properties to query for, this component will take a static or dynamic set of properties (which can be optionally split into chunks) and allow them to be injected into an outbound request by accessing stream_partition.extra_fields.",
26522670
title="Query Properties",
26532671
)
2654-
request_parameters: Optional[Union[Dict[str, Union[str, QueryProperties]], str]] = Field(
2655-
None,
2656-
description="Specifies the query parameters that should be set on an outgoing HTTP request given the inputs.",
2657-
examples=[
2658-
{"unit": "day"},
2659-
{
2660-
"query": 'last_event_time BETWEEN TIMESTAMP "{{ stream_interval.start_time }}" AND TIMESTAMP "{{ stream_interval.end_time }}"'
2661-
},
2662-
{"searchIn": "{{ ','.join(config.get('search_in', [])) }}"},
2663-
{"sort_by[asc]": "updated_at"},
2664-
],
2665-
title="Query Parameters",
2672+
request_parameters: Optional[Union[Dict[str, Union[str, QueryProperties]], str]] = (
2673+
Field(
2674+
None,
2675+
description="Specifies the query parameters that should be set on an outgoing HTTP request given the inputs.",
2676+
examples=[
2677+
{"unit": "day"},
2678+
{
2679+
"query": 'last_event_time BETWEEN TIMESTAMP "{{ stream_interval.start_time }}" AND TIMESTAMP "{{ stream_interval.end_time }}"'
2680+
},
2681+
{"searchIn": "{{ ','.join(config.get('search_in', [])) }}"},
2682+
{"sort_by[asc]": "updated_at"},
2683+
],
2684+
title="Query Parameters",
2685+
)
26662686
)
26672687
request_headers: Optional[Union[Dict[str, str], str]] = Field(
26682688
None,
@@ -2834,7 +2854,9 @@ class QueryProperties(BaseModel):
28342854

28352855
class StateDelegatingStream(BaseModel):
28362856
type: Literal["StateDelegatingStream"]
2837-
name: str = Field(..., description="The stream name.", example=["Users"], title="Name")
2857+
name: str = Field(
2858+
..., description="The stream name.", example=["Users"], title="Name"
2859+
)
28382860
full_refresh_stream: DeclarativeStream = Field(
28392861
...,
28402862
description="Component used to coordinate how records are extracted across stream slices and request pages when the state is empty or not provided.",
@@ -2921,13 +2943,17 @@ class AsyncRetriever(BaseModel):
29212943
status_extractor: Union[DpathExtractor, CustomRecordExtractor] = Field(
29222944
..., description="Responsible for fetching the actual status of the async job."
29232945
)
2924-
download_target_extractor: Optional[Union[DpathExtractor, CustomRecordExtractor]] = Field(
2946+
download_target_extractor: Optional[
2947+
Union[DpathExtractor, CustomRecordExtractor]
2948+
] = Field(
29252949
None,
29262950
description="Responsible for fetching the final result `urls` provided by the completed / finished / ready async job.",
29272951
)
29282952
download_extractor: Optional[
29292953
Union[DpathExtractor, CustomRecordExtractor, ResponseToFileExtractor]
2930-
] = Field(None, description="Responsible for fetching the records from provided urls.")
2954+
] = Field(
2955+
None, description="Responsible for fetching the records from provided urls."
2956+
)
29312957
creation_requester: Union[HttpRequester, CustomRequester] = Field(
29322958
...,
29332959
description="Requester component that describes how to prepare HTTP requests to send to the source API to create the async server-side job.",

0 commit comments

Comments
 (0)