Skip to content

Commit 7319974

Browse files
committed
Fix mypy and add recursive tranformation
1 parent 453580d commit 7319974

File tree

4 files changed

+40
-22
lines changed

4 files changed

+40
-22
lines changed

airbyte_cdk/sources/declarative/declarative_component_schema.yaml

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -1234,7 +1234,7 @@ definitions:
12341234
- "$ref": "#/definitions/CustomTransformation"
12351235
- "$ref": "#/definitions/RemoveFields"
12361236
- "$ref": "#/definitions/KeysToLower"
1237-
- "$ref": "#/definitions/KeyToSnakeCase"
1237+
- "$ref": "#/definitions/KeysToSnakeCase"
12381238
state_migrations:
12391239
title: State Migrations
12401240
description: Array of state migrations to be applied on the input state
@@ -1839,7 +1839,7 @@ definitions:
18391839
$parameters:
18401840
type: object
18411841
additionalProperties: true
1842-
KeyToSnakeCase:
1842+
KeysToSnakeCase:
18431843
title: Key to Snake Case
18441844
description: A transformation that renames all keys to snake case.
18451845
type: object
@@ -1848,7 +1848,7 @@ definitions:
18481848
properties:
18491849
type:
18501850
type: string
1851-
enum: [KeyToSnakeCase]
1851+
enum: [KeysToSnakeCase]
18521852
$parameters:
18531853
type: object
18541854
additionalProperties: true

airbyte_cdk/sources/declarative/models/declarative_component_schema.py

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -710,8 +710,8 @@ class KeysToLower(BaseModel):
710710
parameters: Optional[Dict[str, Any]] = Field(None, alias="$parameters")
711711

712712

713-
class KeyToSnakeCase(BaseModel):
714-
type: Literal["KeyToSnakeCase"]
713+
class KeysToSnakeCase(BaseModel):
714+
type: Literal["KeysToSnakeCase"]
715715
parameters: Optional[Dict[str, Any]] = Field(None, alias="$parameters")
716716

717717

@@ -1665,7 +1665,7 @@ class Config:
16651665
CustomTransformation,
16661666
RemoveFields,
16671667
KeysToLower,
1668-
KeyToSnakeCase,
1668+
KeysToSnakeCase,
16691669
]
16701670
]
16711671
] = Field(

airbyte_cdk/sources/declarative/transformations/keys_to_snake_transformation.py

Lines changed: 21 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -4,20 +4,19 @@
44

55
import re
66
from dataclasses import dataclass
7-
from typing import Any, Dict, Optional
7+
from typing import Any, Dict, List, Optional
88

99
import unidecode
1010

1111
from airbyte_cdk.sources.declarative.transformations import RecordTransformation
1212
from airbyte_cdk.sources.types import Config, StreamSlice, StreamState
1313

14-
TOKEN_PATTERN = re.compile(r"[A-Z]+[a-z]*|[a-z]+|\d+|(?P<NoToken>[^a-zA-Z\d]+)")
15-
DEFAULT_SEPARATOR = "_"
16-
1714

1815
@dataclass
19-
class KeyToSnakeCaseTransformation(RecordTransformation):
20-
token_pattern: re.Pattern = TOKEN_PATTERN
16+
class KeysToSnakeCaseTransformation(RecordTransformation):
17+
token_pattern: re.Pattern[str] = re.compile(
18+
r"[A-Z]+[a-z]*|[a-z]+|\d+|(?P<NoToken>[^a-zA-Z\d]+)"
19+
)
2120

2221
def transform(
2322
self,
@@ -26,13 +25,22 @@ def transform(
2625
stream_state: Optional[StreamState] = None,
2726
stream_slice: Optional[StreamSlice] = None,
2827
) -> None:
29-
transformed_record = {}
30-
for key in record:
31-
transformed_key = self.process_key(key)
32-
transformed_record[transformed_key] = record[key]
28+
transformed_record = self._transform_record(record)
3329
record.clear()
3430
record.update(transformed_record)
3531

32+
def _transform_record(self, record: Dict[str, Any]) -> Dict[str, Any]:
33+
transformed_record = {}
34+
for key, value in record.items():
35+
transformed_key = self.process_key(key)
36+
transformed_value = value
37+
38+
if isinstance(value, dict):
39+
transformed_value = self._transform_record(value)
40+
41+
transformed_record[transformed_key] = transformed_value
42+
return transformed_record
43+
3644
def process_key(self, key: str) -> str:
3745
key = self.normalize_key(key)
3846
tokens = self.tokenize_key(key)
@@ -42,19 +50,19 @@ def process_key(self, key: str) -> str:
4250
def normalize_key(self, key: str) -> str:
4351
return unidecode.unidecode(key)
4452

45-
def tokenize_key(self, key: str) -> list:
53+
def tokenize_key(self, key: str) -> List[str]:
4654
tokens = []
4755
for match in self.token_pattern.finditer(key):
4856
token = match.group(0) if match.group("NoToken") is None else ""
4957
tokens.append(token)
5058
return tokens
5159

52-
def filter_tokens(self, tokens: list) -> list:
60+
def filter_tokens(self, tokens: List[str]) -> List[str]:
5361
if len(tokens) >= 3:
5462
tokens = tokens[:1] + [t for t in tokens[1:-1] if t] + tokens[-1:]
5563
if tokens and tokens[0].isdigit():
5664
tokens.insert(0, "")
5765
return tokens
5866

59-
def tokens_to_snake_case(self, tokens: list) -> str:
67+
def tokens_to_snake_case(self, tokens: List[str]) -> str:
6068
return "_".join(token.lower() for token in tokens)

unit_tests/sources/declarative/transformations/test_keys_to_snake_transformation.py

Lines changed: 13 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -5,7 +5,7 @@
55
import pytest
66

77
from airbyte_cdk.sources.declarative.transformations.keys_to_snake_transformation import (
8-
KeyToSnakeCaseTransformation,
8+
KeysToSnakeCaseTransformation,
99
)
1010

1111
_ANY_VALUE = -1
@@ -22,6 +22,16 @@
2222
{"123Number": _ANY_VALUE, "456Another123": _ANY_VALUE},
2323
{"_123_number": _ANY_VALUE, "_456_another_123": _ANY_VALUE},
2424
),
25+
(
26+
{
27+
"NestedRecord": {"FirstName": _ANY_VALUE, "lastName": _ANY_VALUE},
28+
"456Another123": _ANY_VALUE,
29+
},
30+
{
31+
"nested_record": {"first_name": _ANY_VALUE, "last_name": _ANY_VALUE},
32+
"_456_another_123": _ANY_VALUE,
33+
},
34+
),
2535
(
2636
{"hello@world": _ANY_VALUE, "test#case": _ANY_VALUE},
2737
{"hello_world": _ANY_VALUE, "test_case": _ANY_VALUE},
@@ -43,6 +53,6 @@
4353
),
4454
],
4555
)
46-
def test_key_transformation(input_keys, expected_keys):
47-
KeyToSnakeCaseTransformation().transform(input_keys)
56+
def test_keys_transformation(input_keys, expected_keys):
57+
KeysToSnakeCaseTransformation().transform(input_keys)
4858
assert input_keys == expected_keys

0 commit comments

Comments
 (0)