diff --git a/datacontract/export/bigquery_converter.py b/datacontract/export/bigquery_converter.py index c69912e7..207e8cc4 100644 --- a/datacontract/export/bigquery_converter.py +++ b/datacontract/export/bigquery_converter.py @@ -44,7 +44,7 @@ def to_fields_array(fields: Dict[str, Field]) -> List[Dict[str, Field]]: def to_field(field_name: str, field: Field) -> dict: - bq_type = map_type_to_bigquery(field.type, field_name) + bq_type = map_type_to_bigquery(field) bq_field = { "name": field_name, "type": bq_type, @@ -79,37 +79,46 @@ def to_field(field_name: str, field: Field) -> dict: return bq_field -def map_type_to_bigquery(type_str: str, field_name: str) -> str: +def map_type_to_bigquery(field: Field) -> str: logger = logging.getLogger(__name__) - if type_str.lower() in ["string", "varchar", "text"]: + + field_type = field.type + if not field_type: + return None + + if field.config and "bigqueryType" in field.config: + return field.config["bigqueryType"] + + if field_type.lower() in ["string", "varchar", "text"]: return "STRING" - elif type_str == "bytes": + elif field_type.lower() == "bytes": return "BYTES" - elif type_str.lower() in ["int", "integer"]: + elif field_type.lower() in ["int", "integer"]: return "INTEGER" - elif type_str.lower() in ["long", "bigint"]: + elif field_type.lower() in ["long", "bigint"]: return "INT64" - elif type_str == "float": + elif field_type.lower() == "float": return "FLOAT64" - elif type_str == "boolean": + elif field_type.lower() == "boolean": return "BOOL" - elif type_str.lower() in ["timestamp", "timestamp_tz"]: + elif field_type.lower() in ["timestamp", "timestamp_tz"]: return "TIMESTAMP" - elif type_str == "date": + elif field_type.lower() == "date": return "DATE" - elif type_str == "timestamp_ntz": + elif field_type.lower() == "timestamp_ntz": return "TIME" - elif type_str.lower() in ["number", "decimal", "numeric"]: + elif field_type.lower() in ["number", "decimal", "numeric"]: return "NUMERIC" - elif type_str == "double": + elif field_type.lower() == "double": return "BIGNUMERIC" - elif type_str.lower() in ["object", "record", "array"]: + elif field_type.lower() in ["object", "record", "array"]: return "RECORD" - elif type_str == "struct": + elif field_type.lower() == "struct": return "STRUCT" - elif type_str == "null": + elif field_type.lower() == "null": logger.info( - f"Can't properly map {field_name} to bigquery Schema, as 'null' is not supported as a type. Mapping it to STRING." + f"Can't properly map {field.title} to bigquery Schema, as 'null' \ + is not supported as a type. Mapping it to STRING." ) return "STRING" else: @@ -117,6 +126,6 @@ def map_type_to_bigquery(type_str: str, field_name: str) -> str: type="schema", result="failed", name="Map datacontract type to bigquery data type", - reason=f"Unsupported type {type_str} in data contract definition.", + reason=f"Unsupported type {field_type} in data contract definition.", engine="datacontract", ) diff --git a/datacontract/export/sql_type_converter.py b/datacontract/export/sql_type_converter.py index 9dcd8fe6..87ca522e 100644 --- a/datacontract/export/sql_type_converter.py +++ b/datacontract/export/sql_type_converter.py @@ -269,16 +269,7 @@ def convert_type_to_sqlserver(field: Field) -> None | str: def convert_type_to_bigquery(field: Field) -> None | str: """Convert from supported datacontract types to equivalent bigquery types""" - field_type = field.type - if not field_type: - return None - - # If provided sql-server config type, prefer it over default mapping - if bigquery_type := get_type_config(field, "bigqueryType"): - return bigquery_type - - field_type = field_type.lower() - return map_type_to_bigquery(field_type, field.title) + return map_type_to_bigquery(field) def get_type_config(field: Field, config_attr: str) -> dict[str, str] | None: diff --git a/tests/fixtures/bigquery/export/bq_table_schema.json b/tests/fixtures/bigquery/export/bq_table_schema.json index 4aafc69a..e9d63671 100644 --- a/tests/fixtures/bigquery/export/bq_table_schema.json +++ b/tests/fixtures/bigquery/export/bq_table_schema.json @@ -267,7 +267,13 @@ "scale": null } ] + }, + { + "name": "custom_type_field", + "type": "DATETIME", + "mode": "NULLABLE", + "description": "Change the datacontract type to a BigQuery type." } ] } -} \ No newline at end of file +} diff --git a/tests/fixtures/bigquery/export/datacontract.yaml b/tests/fixtures/bigquery/export/datacontract.yaml index a03a6359..db64e931 100644 --- a/tests/fixtures/bigquery/export/datacontract.yaml +++ b/tests/fixtures/bigquery/export/datacontract.yaml @@ -180,4 +180,10 @@ models: Field2: type: double required: true - description: a double field \ No newline at end of file + description: a double field + custom_type_field: + type: string + required: false + description: Change the datacontract type to a BigQuery type. + config: + bigqueryType: DATETIME