Skip to content

Commit

Permalink
refactor: Object-oriented JSON-schema loading
Browse files Browse the repository at this point in the history
  • Loading branch information
yehorb authored Dec 4, 2021
1 parent d0d101a commit de7de13
Show file tree
Hide file tree
Showing 30 changed files with 776 additions and 328 deletions.
1 change: 1 addition & 0 deletions .pre-commit-config.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -30,6 +30,7 @@ repos:
additional_dependencies:
- "types-pytz"
- "types-requests"
- "types-setuptools"
- repo: https://github.com/PyCQA/flake8
rev: 4.0.1
hooks:
Expand Down
2 changes: 2 additions & 0 deletions setup.cfg
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,7 @@ name = tap-apple-search-ads
version = 0.0

[options]
python_required = >= 3.8
packages = find:
package_dir =
=src
Expand All @@ -24,6 +25,7 @@ console_scripts =

[options.package_data]
tap_apple_search_ads = py.typed
* = *.json

[options.extras_require]
dev =
Expand Down
73 changes: 10 additions & 63 deletions src/tap_apple_search_ads/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,15 +6,15 @@
import time
from typing import Any, Dict, List, Mapping, MutableMapping, Optional, Tuple

import pkg_resources
import pytz
import singer
from singer import metadata
from singer.transform import RefResolver
from singer.transform import _resolve_schema_references as s_rsr

from tap_apple_search_ads import config as tap_config
from tap_apple_search_ads.api import auth, campaign, campaign_level_reports
from tap_apple_search_ads.api.auth import client_secret
from tap_apple_search_ads.schema.from_file import api as schema

logger = singer.get_logger()

Expand Down Expand Up @@ -51,16 +51,13 @@ def do_discover() -> int:
result: Dict[str, List[Dict[str, Any]]] = {"streams": []}

for stream in STREAMS:
schema = load_schema(stream)
definitions = load_definitions()
schema = resolve_schema_references(schema, definitions)
schema.pop("definitions", None)
stream_schema = load_schema(stream)

result["streams"].append(
{
"stream": stream,
"tap_stream_id": stream,
"schema": schema,
"schema": stream_schema,
"metadata": [
{
"metadata": {
Expand All @@ -78,65 +75,15 @@ def do_discover() -> int:


def load_schema(stream_name: str) -> Dict[str, Any]:
path = (
pathlib.Path(__file__).parent / "schemas" / "{}.json".format(stream_name)
).absolute()
schemas_directory = pkg_resources.resource_filename(__name__, "schemas")

with open(path, "r") as stream:
schema = json.load(stream)
loader = schema.Loader(schemas_directory)
resolver = schema.Resolver(loader)
facade = schema.Facade(resolver)

return schema
schema_loader = getattr(facade, stream_name)


def load_definitions() -> Dict[str, Dict[str, Any]]:
schemas_path = pathlib.Path(__file__).parent / "schemas"
path = schemas_path / "definitions"

intermediate_definitions = {}

for definition_file in path.iterdir():
if not definition_file.is_file():
continue

with open(definition_file, "r") as stream:
schema = json.load(stream)

key = definition_file.relative_to(schemas_path).as_posix()
intermediate_definitions[key] = schema

key = definition_file.name
intermediate_definitions[key] = schema

definitions = {}

for key, schema in intermediate_definitions.items():
schema = singer.resolve_schema_references(schema, intermediate_definitions)
schema.pop("definitions", None)
definitions[key] = schema

return definitions


def resolve_schema_references(
schema: Dict[str, Any], refs: Optional[Dict[str, Dict[str, Any]]] = None
) -> Dict[str, Any]:
"""resolve_schema_references is a re-implementation of the same function from
singer.transform. It allows resolution of "allOf" schema element. "allOf" element
is missing from provided implementation for reasons unknown.
"""

refs = refs or {}
return _resolve_schema_references(schema, RefResolver("", schema, store=refs))


def _resolve_schema_references(
schema: Dict[str, Any], resolver: RefResolver
) -> Dict[str, Any]:
if "allOf" in schema:
for i, element in enumerate(schema["allOf"]):
schema["allOf"][i] = _resolve_schema_references(element, resolver)

return s_rsr(schema, resolver)
return schema_loader()


def do_sync(config: Dict[str, Any], catalog: singer.Catalog):
Expand Down
3 changes: 3 additions & 0 deletions src/tap_apple_search_ads/schema/__init__.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@
"""Working with JSON schemas."""

from . import from_file as from_file # noqa
16 changes: 16 additions & 0 deletions src/tap_apple_search_ads/schema/from_file/__init__.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,16 @@
"""Loading Schema from JSON files."""

from abc import ABC, abstractmethod
from typing import Any, Dict

Schema = Dict[str, Any]


class SchemaCollection(ABC):
@abstractmethod
def get_schema_by_name(self, name: str) -> Schema:
...

@abstractmethod
def get_schemas(self) -> Dict[str, Schema]:
...
5 changes: 5 additions & 0 deletions src/tap_apple_search_ads/schema/from_file/api.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,5 @@
from . import Schema as Schema # noqa
from .facade import Facade as Facade # noqa
from .loader import Loader as Loader # noqa
from .loader import LoaderError as LoaderError # noqa
from .resolver import Resolver as Resolver # noqa
25 changes: 25 additions & 0 deletions src/tap_apple_search_ads/schema/from_file/facade.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,25 @@
from dataclasses import dataclass

from . import Schema, SchemaCollection


@dataclass
class Facade:
collection: SchemaCollection

def campaign(self) -> Schema:
return self.collection.get_schema_by_name("Campaign.json")

def campaign_flat(self) -> Schema:
return self.collection.get_schema_by_name("Campaign_Flat.json")

def campaign_level_reports(self) -> Schema:
return self.collection.get_schema_by_name("Row.json")

def campaign_level_reports_extended_spend_row(self) -> Schema:
return self.collection.get_schema_by_name("ExtendedSpendRow_campaignId.json")

def campaign_level_reports_extended_spend_row_flat(self) -> Schema:
return self.collection.get_schema_by_name(
"ExtendedSpendRow_campaignId_Flat.json"
)
65 changes: 65 additions & 0 deletions src/tap_apple_search_ads/schema/from_file/loader.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,65 @@
import json
from dataclasses import InitVar, dataclass, field
from pathlib import Path
from typing import Dict, Union

from . import Schema, SchemaCollection

JSON = [".json"]


@dataclass
class Loader(SchemaCollection):
path: Path = field(init=False)
_schemas: Dict[str, Schema] = field(init=False, default_factory=dict)

schemas_directory: InitVar[Union[str, Path]]

def __post_init__(self, schemas_directory: Union[str, Path]) -> None:
if isinstance(schemas_directory, str):
schemas_directory = Path(schemas_directory)

self.path = schemas_directory

def get_schema_by_name(self, name: str) -> Schema:
return self.schemas[name]

@property
def schemas(self) -> Dict[str, Schema]:
if not self._schemas:
self._schemas = load_json_files(self.path)

return self._schemas

def get_schemas(self) -> Dict[str, Schema]:
return self.schemas


def load_json_files(directory: Path) -> Dict[str, Schema]:
if not directory.exists():
raise LoaderError("path {} does not exist".format(directory))

if not directory.is_dir():
raise LoaderError("path {} is not a directory".format(directory))

json_files: Dict[str, Schema] = {}
for file in directory.iterdir():
if not file.is_file():
continue

if file.suffix not in JSON:
continue

with open(file) as stream:
json_files[file.name] = json.load(stream)

if not json_files:
raise LoaderError(
"directory {} does not contain any JSON files".format(directory)
)

return json_files


class LoaderError(Exception):
"""Schema loading failed"""
57 changes: 57 additions & 0 deletions src/tap_apple_search_ads/schema/from_file/resolver.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,57 @@
from dataclasses import dataclass
from typing import Any, Dict, Optional

from singer.transform import RefResolver
from singer.transform import (
_resolve_schema_references as singer_resolve_schema_references,
)

from . import Schema, SchemaCollection

DEFS = "$defs"


@dataclass
class Resolver(SchemaCollection):
collection: SchemaCollection

def get_schema_by_name(self, name: str) -> Schema:
schema = self.collection.get_schema_by_name(name)
return self.resolve(schema)

def resolve(self, schema: Schema) -> Schema:
schema = resolve_schema_references(schema, self.parent_schemas)
schema.pop(DEFS, None)
return schema

@property
def parent_schemas(self) -> Dict[str, Schema]:
return self.collection.get_schemas()

def get_schemas(self) -> Dict[str, Schema]:
resolved: Dict[str, Schema] = {}

for key, schema in self.parent_schemas.items():
resolved[key] = self.resolve(schema)

return resolved


def resolve_schema_references(
schema: Schema, refs: Optional[Dict[str, Schema]] = None
) -> Dict[str, Any]:
"""resolve_schema_references is a re-implementation of the same function from
singer.transform. It allows resolution of "allOf" schema element. "allOf" element
is missing from provided implementation for reasons unknown.
"""

refs = refs or {}
return _resolve_schema_references(schema, RefResolver("", schema, store=refs))


def _resolve_schema_references(schema: Schema, resolver: RefResolver) -> Schema:
if "allOf" in schema:
for i, element in enumerate(schema["allOf"]):
schema["allOf"][i] = _resolve_schema_references(element, resolver)

return singer_resolve_schema_references(schema, resolver)
Original file line number Diff line number Diff line change
@@ -1,4 +1,5 @@
{
"$id": "https://developer.apple.com/documentation/apple_search_ads/campaign",
"type": "object",
"required": [
"adamId",
Expand All @@ -20,7 +21,7 @@
"enum": ["IMPRESSIONS", "TAPS"]
},
"budgetAmount": {
"$ref": "definitions/Money.json"
"$ref": "Money.json"
},
"budgetOrders": {
"type": "array",
Expand All @@ -35,10 +36,10 @@
}
},
"countryOrRegionServingStateReasons": {
"$ref": "definitions/Campaign.CountryOrRegionServingStateReasons.json"
"$ref": "Campaign.CountryOrRegionServingStateReasons.json"
},
"dailyBudgetAmount": {
"$ref": "definitions/Money.json"
"$ref": "Money.json"
},
"deleted": {
"type": "boolean",
Expand All @@ -55,7 +56,7 @@
"type": "integer"
},
"locInvoiceDetails": {
"$ref": "definitions/LOCInvoiceDetails.json"
"$ref": "#/$defs/LOCInvoiceDetails"
},
"modificationTime": {
"type": "string",
Expand Down Expand Up @@ -122,5 +123,27 @@
"enum": ["APPSTORE_SEARCH_RESULTS", "APPSTORE_SEARCH_TAB"]
}
}
},
"$defs": {
"LOCInvoiceDetails": {
"type": ["null", "object"],
"properties": {
"billingContactEmail": {
"type": "string"
},
"buyerEmail": {
"type": "string"
},
"buyerName": {
"type": "string"
},
"clientName": {
"type": "string"
},
"orderNumber": {
"type": "string"
}
}
}
}
}
Loading

0 comments on commit de7de13

Please sign in to comment.