Skip to content

Commit

Permalink
Adds support for JSON schema arrays, resolves issue #305
Browse files Browse the repository at this point in the history
  • Loading branch information
johannesrave committed Jul 26, 2024
1 parent c74c9aa commit b96f7d4
Show file tree
Hide file tree
Showing 10 changed files with 190 additions and 94 deletions.
8 changes: 7 additions & 1 deletion CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -11,9 +11,15 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0

- Support data type map in Glue import. (#340)
- Basic html export for new `keys` and `values` fields.
- Added support for arrays in JSON schema import (#305)

### Changed

- Aligned JSON schema import and export of required properties

### Fixed

- Fix required field handling in JSON schema import
- Fix an issue where the quality and definition `$ref` are not always resolved.
- Fix an issue where the JSON schema validation fails for a field with type `string` and format `uuid`

Expand Down Expand Up @@ -329,4 +335,4 @@ The Golang version can be found at [cli-go](https://github.com/datacontract/cli-

## [0.1.1]
### Added
- Initial release.
- Initial release.
5 changes: 1 addition & 4 deletions datacontract/export/jsonschema_converter.py
Original file line number Diff line number Diff line change
Expand Up @@ -36,10 +36,7 @@ def to_property(field: Field) -> dict:
property = {}
json_type, json_format = convert_type_format(field.type, field.format)
if json_type is not None:
if field.required:
property["type"] = json_type
else:
property["type"] = [json_type, "null"]
property["type"] = json_type
if json_format is not None:
property["format"] = json_format
if field.unique:
Expand Down
145 changes: 82 additions & 63 deletions datacontract/imports/jsonschema_importer.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,73 +10,85 @@
class JsonSchemaImporter(Importer):
def import_source(
self, data_contract_specification: DataContractSpecification, source: str, import_args: dict
) -> dict:
) -> DataContractSpecification:
return import_jsonschema(data_contract_specification, source)


def convert_json_schema_properties(properties, is_definition=False):
def convert_json_schema_properties(properties, required_properties, is_definition=False):
fields = {}
for field_name, field_schema in properties.items():
field_kwargs = {}
field_type = field_schema.get("type")

# Determine if the field is required and set the type to the non-null option if applicable
if isinstance(field_type, list) and "null" in field_type:
field_kwargs["required"] = False
non_null_types = [t for t in field_type if t != "null"]
if non_null_types:
field_type = non_null_types[0]
else:
field_type = None
else:
field_kwargs["required"] = True

# Set the non-null type
if field_type:
field_kwargs["type"] = field_type

for key, value in field_schema.items():
match key:
case "title":
field_kwargs["title"] = value
case "type":
pass # type is already handled above
case "format":
field_kwargs["format"] = value
case "description":
field_kwargs["description"] = value
case "pattern":
field_kwargs["pattern"] = value
case "minLength":
field_kwargs["minLength"] = value
case "maxLength":
field_kwargs["maxLength"] = value
case "minimum":
field_kwargs["minimum"] = value
case "exclusiveMinimum":
field_kwargs["exclusiveMinimum"] = value
case "maximum":
field_kwargs["maximum"] = value
case "exclusiveMaximum":
field_kwargs["exclusiveMaximum"] = value
case "enum":
field_kwargs["enum"] = value
case "tags":
field_kwargs["tags"] = value
case "properties":
field_kwargs["fields"] = convert_json_schema_properties(value, is_definition=is_definition)
case "items":
field_kwargs["items"] = convert_json_schema_properties(value, is_definition=is_definition)

if is_definition:
field = Definition(**field_kwargs)
else:
field = Field(**field_kwargs)
is_required = field_name in required_properties
field = to_field(field_schema, is_definition, is_required)
fields[field_name] = field

return fields


def to_field(field_schema, is_definition, is_required: bool = None) -> Definition | Field:
field_kwargs = {}
field_type = field_schema.get("type")
# Determine if the field is required and set the type to the non-null option if applicable
if isinstance(field_type, list) and "null" in field_type:
field_kwargs["required"] = False
non_null_types = [t for t in field_type if t != "null"]
if non_null_types:
field_type = non_null_types[0]
else:
field_type = None
elif is_required is not None:
field_kwargs["required"] = is_required
# Set the non-null type
if field_type:
field_kwargs["type"] = field_type
for key, value in field_schema.items():
match key:
case "title":
field_kwargs["title"] = value
case "type":
pass # type is already handled above
case "format":
field_kwargs["format"] = value
case "description":
field_kwargs["description"] = value
case "pattern":
field_kwargs["pattern"] = value
case "minLength":
field_kwargs["minLength"] = value
case "maxLength":
field_kwargs["maxLength"] = value
case "minimum":
field_kwargs["minimum"] = value
case "exclusiveMinimum":
field_kwargs["exclusiveMinimum"] = value
case "maximum":
field_kwargs["maximum"] = value
case "exclusiveMaximum":
field_kwargs["exclusiveMaximum"] = value
case "enum":
field_kwargs["enum"] = value
case "tags":
field_kwargs["tags"] = value
case "properties":
field_kwargs["fields"] = convert_json_schema_properties(value, is_definition=is_definition)
case "items":
if isinstance(value, list):
if len(value) != 1:
raise DataContractException(
type="schema",
name="Parse json schema",
reason=f"Union types are currently not supported ({value})",
engine="datacontract",
)
field_kwargs["items"] = to_field(value[0], is_definition=is_definition, is_required=None)
else:
field_kwargs["items"] = to_field(value, is_definition=is_definition, is_required=None)
if is_definition:
field = Definition(**field_kwargs)
else:
field = Field(**field_kwargs)
return field


def import_jsonschema(data_contract_specification: DataContractSpecification, source: str) -> DataContractSpecification:
if data_contract_specification.models is None:
data_contract_specification.models = {}
Expand All @@ -87,18 +99,23 @@ def import_jsonschema(data_contract_specification: DataContractSpecification, so
validator = fastjsonschema.compile({})
validator(json_schema)

description = json_schema.get("description")
type_ = json_schema.get("type")
title = json_schema.get("title")
properties = json_schema.get("properties", {})
required_properties = json_schema.get("required", [])

model = Model(
description=json_schema.get("description"),
type=json_schema.get("type"),
title=json_schema.get("title"),
fields=convert_json_schema_properties(json_schema.get("properties", {})),
description=description,
type=type_,
title=title,
fields=convert_json_schema_properties(properties, required_properties),
)
data_contract_specification.models[json_schema.get("title", "default_model")] = model

if "definitions" in json_schema:
for def_name, def_schema in json_schema["definitions"].items():
definition_kwargs = {}

for key, value in def_schema.items():
match key:
case "domain":
Expand Down Expand Up @@ -134,7 +151,9 @@ def import_jsonschema(data_contract_specification: DataContractSpecification, so
case "tags":
definition_kwargs["tags"] = value
case "properties":
definition_kwargs["fields"] = convert_json_schema_properties(value, is_definition=True)
definition_kwargs["fields"] = convert_json_schema_properties(
value, def_schema.get("required", []), is_definition=True
)

definition = Definition(name=def_name, **definition_kwargs)
data_contract_specification.definitions[def_name] = definition
Expand Down
2 changes: 1 addition & 1 deletion tests/fixtures/import/football-datacontract.yml
Original file line number Diff line number Diff line change
Expand Up @@ -34,5 +34,5 @@ definitions:
required: true
year_founded:
type: integer
required: true
required: false

15 changes: 10 additions & 5 deletions tests/fixtures/import/orders.json
Original file line number Diff line number Diff line change
Expand Up @@ -20,11 +20,15 @@
"title": "Order Total",
"description": "Total amount of the order"
},
"line_items": {
"type": "array",
"title": "Line Items",
"items": {
"type" : "integer"
}
},
"customer_id": {
"type": [
"string",
"null"
],
"type": "string",
"minLength": 10,
"maxLength": 20,
"title": "Customer ID",
Expand All @@ -47,7 +51,8 @@
"order_id",
"order_timestamp",
"order_total",
"line_items",
"customer_email_address",
"processed_timestamp"
]
}
}
69 changes: 69 additions & 0 deletions tests/fixtures/import/orders_union-types.json
Original file line number Diff line number Diff line change
@@ -0,0 +1,69 @@
{
"$schema": "http://json-schema.org/draft-07/schema#",
"title": "OrderSchema",
"description": "Schema for order details",
"type": "object",
"properties": {
"order_id": {
"type": "string",
"title": "Order ID",
"description": "Unique identifier for the order"
},
"order_timestamp": {
"type": "string",
"format": "date-time",
"title": "Order Timestamp",
"description": "Timestamp when the order was placed"
},
"order_total": {
"type": "integer",
"title": "Order Total",
"description": "Total amount of the order"
},
"line_items": {
"type": "array",
"title": "Line Items",
"items": {
"type" : "integer"
}
},
"vouchers": {
"type": "array",
"title": "List of used vouchers",
"items": [
{
"type": "integer"
}
]
},
"customer_id": {
"type": [
"string",
"null"
],
"minLength": 10,
"maxLength": 20,
"title": "Customer ID",
"description": "Unique identifier for the customer"
},
"customer_email_address": {
"type": "string",
"format": "email",
"title": "Customer Email Address",
"description": "Email address of the customer"
},
"processed_timestamp": {
"type": "string",
"format": "date-time",
"title": "Processed Timestamp",
"description": "Timestamp when the order was processed"
}
},
"required": [
"order_id",
"order_timestamp",
"order_total",
"customer_email_address",
"processed_timestamp"
]
}
Original file line number Diff line number Diff line change
Expand Up @@ -25,6 +25,18 @@ models:
type: integer
required: true
description: Total amount of the order
line_items:
title: Line Items
type: array
required: false
items:
type: integer
vouchers:
title: List of used vouchers
type: array
required: false
items:
type: integer
customer_id:
title: Customer ID
type: string
Expand Down
10 changes: 2 additions & 8 deletions tests/fixtures/local-json/datacontract.json
Original file line number Diff line number Diff line change
Expand Up @@ -73,21 +73,15 @@
"description": "e\nendg\u00fcltiger Wert\n0\nweniger als die H\u00e4lfte von 1 in der letzten besetzten Stelle, jedoch mehr als nichts\n-\nnichts vorhanden\n...\nAngabe f\u00e4llt sp\u00e4ter an\n/\nkeine Angaben, da Zahlenwert nicht sicher genug\n.\nZahlenwert unbekannt oder geheimzuhalten\nx\nTabellenfach gesperrt, weil Aussage nicht sinnvoll\n()\nAussagewert eingeschr\u00e4nkt, da der Zahlenwert statistisch relativ unsicher ist\np\nvorl\u00e4ufige Zahl\nr\nberichtigte Zahl\ns\ngesch\u00e4tzte Zahl\n"
},
"Verbraucherpreisindex__CH0004": {
"type": [
"string",
"null"
],
"type": "string",
"description": "Ver\u00e4nderung zum Vorjahresmonat"
},
"Verbraucherpreisindex__CH0004__q": {
"type": "string",
"description": "e\nendg\u00fcltiger Wert\n0\nweniger als die H\u00e4lfte von 1 in der letzten besetzten Stelle, jedoch mehr als nichts\n-\nnichts vorhanden\n...\nAngabe f\u00e4llt sp\u00e4ter an\n/\nkeine Angaben, da Zahlenwert nicht sicher genug\n.\nZahlenwert unbekannt oder geheimzuhalten\nx\nTabellenfach gesperrt, weil Aussage nicht sinnvoll\n()\nAussagewert eingeschr\u00e4nkt, da der Zahlenwert statistisch relativ unsicher ist\np\nvorl\u00e4ufige Zahl\nr\nberichtigte Zahl\ns\ngesch\u00e4tzte Zahl\n"
},
"PREIS1__CH0005": {
"type": [
"string",
"null"
],
"type": "string",
"description": "Ver\u00e4nderung zum Vormonat"
},
"PREIS1__CH0005__q": {
Expand Down
Loading

0 comments on commit b96f7d4

Please sign in to comment.