Skip to content

Commit

Permalink
Update BQ map method (#422)
Browse files Browse the repository at this point in the history
* Update BQ map method

* Include test case

* eof

* eof
  • Loading branch information
ffernandez92 authored Sep 19, 2024
1 parent 95fe7a2 commit 283c7d1
Show file tree
Hide file tree
Showing 4 changed files with 42 additions and 30 deletions.
45 changes: 27 additions & 18 deletions datacontract/export/bigquery_converter.py
Original file line number Diff line number Diff line change
Expand Up @@ -44,7 +44,7 @@ def to_fields_array(fields: Dict[str, Field]) -> List[Dict[str, Field]]:


def to_field(field_name: str, field: Field) -> dict:
bq_type = map_type_to_bigquery(field.type, field_name)
bq_type = map_type_to_bigquery(field)
bq_field = {
"name": field_name,
"type": bq_type,
Expand Down Expand Up @@ -79,44 +79,53 @@ def to_field(field_name: str, field: Field) -> dict:
return bq_field


def map_type_to_bigquery(type_str: str, field_name: str) -> str:
def map_type_to_bigquery(field: Field) -> str:
logger = logging.getLogger(__name__)
if type_str.lower() in ["string", "varchar", "text"]:

field_type = field.type
if not field_type:
return None

if field.config and "bigqueryType" in field.config:
return field.config["bigqueryType"]

if field_type.lower() in ["string", "varchar", "text"]:
return "STRING"
elif type_str == "bytes":
elif field_type.lower() == "bytes":
return "BYTES"
elif type_str.lower() in ["int", "integer"]:
elif field_type.lower() in ["int", "integer"]:
return "INTEGER"
elif type_str.lower() in ["long", "bigint"]:
elif field_type.lower() in ["long", "bigint"]:
return "INT64"
elif type_str == "float":
elif field_type.lower() == "float":
return "FLOAT64"
elif type_str == "boolean":
elif field_type.lower() == "boolean":
return "BOOL"
elif type_str.lower() in ["timestamp", "timestamp_tz"]:
elif field_type.lower() in ["timestamp", "timestamp_tz"]:
return "TIMESTAMP"
elif type_str == "date":
elif field_type.lower() == "date":
return "DATE"
elif type_str == "timestamp_ntz":
elif field_type.lower() == "timestamp_ntz":
return "TIME"
elif type_str.lower() in ["number", "decimal", "numeric"]:
elif field_type.lower() in ["number", "decimal", "numeric"]:
return "NUMERIC"
elif type_str == "double":
elif field_type.lower() == "double":
return "BIGNUMERIC"
elif type_str.lower() in ["object", "record", "array"]:
elif field_type.lower() in ["object", "record", "array"]:
return "RECORD"
elif type_str == "struct":
elif field_type.lower() == "struct":
return "STRUCT"
elif type_str == "null":
elif field_type.lower() == "null":
logger.info(
f"Can't properly map {field_name} to bigquery Schema, as 'null' is not supported as a type. Mapping it to STRING."
f"Can't properly map {field.title} to bigquery Schema, as 'null' \
is not supported as a type. Mapping it to STRING."
)
return "STRING"
else:
raise DataContractException(
type="schema",
result="failed",
name="Map datacontract type to bigquery data type",
reason=f"Unsupported type {type_str} in data contract definition.",
reason=f"Unsupported type {field_type} in data contract definition.",
engine="datacontract",
)
11 changes: 1 addition & 10 deletions datacontract/export/sql_type_converter.py
Original file line number Diff line number Diff line change
Expand Up @@ -269,16 +269,7 @@ def convert_type_to_sqlserver(field: Field) -> None | str:

def convert_type_to_bigquery(field: Field) -> None | str:
"""Convert from supported datacontract types to equivalent bigquery types"""
field_type = field.type
if not field_type:
return None

# If provided sql-server config type, prefer it over default mapping
if bigquery_type := get_type_config(field, "bigqueryType"):
return bigquery_type

field_type = field_type.lower()
return map_type_to_bigquery(field_type, field.title)
return map_type_to_bigquery(field)


def get_type_config(field: Field, config_attr: str) -> dict[str, str] | None:
Expand Down
8 changes: 7 additions & 1 deletion tests/fixtures/bigquery/export/bq_table_schema.json
Original file line number Diff line number Diff line change
Expand Up @@ -267,7 +267,13 @@
"scale": null
}
]
},
{
"name": "custom_type_field",
"type": "DATETIME",
"mode": "NULLABLE",
"description": "Change the datacontract type to a BigQuery type."
}
]
}
}
}
8 changes: 7 additions & 1 deletion tests/fixtures/bigquery/export/datacontract.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -180,4 +180,10 @@ models:
Field2:
type: double
required: true
description: a double field
description: a double field
custom_type_field:
type: string
required: false
description: Change the datacontract type to a BigQuery type.
config:
bigqueryType: DATETIME

0 comments on commit 283c7d1

Please sign in to comment.