Skip to content

Commit

Permalink
Fix for describe column types (#796)
Browse files Browse the repository at this point in the history
  • Loading branch information
benc-db authored Sep 17, 2024
1 parent 39906ad commit 4b88d80
Show file tree
Hide file tree
Showing 7 changed files with 125 additions and 2 deletions.
1 change: 1 addition & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,7 @@
- Update default scope/redirect Url for OAuth U2M, so with default OAuth app user can run python models ([776](https://github.com/databricks/dbt-databricks/pull/776))
- Fix foreign key constraints by switching from `parent` to `to` and `parent_columns` to `to_columns` ([789](https://github.com/databricks/dbt-databricks/pull/789))
- Now handles external shallow clones without blowing up ([795](https://github.com/databricks/dbt-databricks/pull/795))
- Use information_schema to get column types when possible, since describe extended truncates complex types ([796](https://github.com/databricks/dbt-databricks/pull/796))

## dbt-databricks 1.8.5 (August 6, 2024)

Expand Down
38 changes: 38 additions & 0 deletions dbt/adapters/databricks/impl.py
Original file line number Diff line number Diff line change
Expand Up @@ -73,6 +73,7 @@
from dbt.adapters.spark.impl import LIST_SCHEMAS_MACRO_NAME
from dbt.adapters.spark.impl import SparkAdapter
from dbt.adapters.spark.impl import TABLE_OR_VIEW_NOT_FOUND_MESSAGES
from dbt_common.behavior_flags import BehaviorFlag
from dbt_common.exceptions import DbtRuntimeError
from dbt_common.utils import executor
from dbt_common.utils.dict import AttrDict
Expand All @@ -88,6 +89,7 @@
SHOW_TABLES_MACRO_NAME = "show_tables"
SHOW_VIEWS_MACRO_NAME = "show_views"
GET_COLUMNS_COMMENTS_MACRO_NAME = "get_columns_comments"
GET_COLUMNS_BY_INFO_MACRO_NAME = "get_columns_comments_via_information_schema"


@dataclass
Expand Down Expand Up @@ -164,6 +166,12 @@ class DatabricksAdapter(SparkAdapter):
}
)

# This will begin working once we have 1.9 of dbt-core.
# For now does nothing
@property
def _behavior_flags(self) -> List[BehaviorFlag]:
return [{"name": "column_types_from_information_schema", "default": False}] # type: ignore

# override/overload
def acquire_connection(
self, name: Optional[str] = None, query_header_context: Any = None
Expand Down Expand Up @@ -376,6 +384,36 @@ def parse_describe_extended( # type: ignore[override]

def get_columns_in_relation( # type: ignore[override]
self, relation: DatabricksRelation
) -> List[DatabricksColumn]:
if (
# We can uncomment this once behavior flags are available to adapters
# self.behavior.column_types_from_information_schema and # type: ignore
not relation.is_hive_metastore()
):
return self._get_columns_in_relation_by_information_schema(relation)
else:
return self._get_columns_in_relation_by_describe(relation)

def _get_columns_in_relation_by_information_schema(
self, relation: DatabricksRelation
) -> List[DatabricksColumn]:
rows = list(
handle_missing_objects(
lambda: self.execute_macro(
GET_COLUMNS_BY_INFO_MACRO_NAME, kwargs={"relation": relation}
),
AttrDict(),
)
)

columns = []
for row in rows:
columns.append(DatabricksColumn(column=row[0], dtype=row[1], comment=row[2]))

return columns

def _get_columns_in_relation_by_describe(
self, relation: DatabricksRelation
) -> List[DatabricksColumn]:
rows = list(
handle_missing_objects(
Expand Down
16 changes: 16 additions & 0 deletions dbt/include/databricks/macros/adapters/persist_docs.sql
Original file line number Diff line number Diff line change
Expand Up @@ -28,6 +28,22 @@
{% do return(load_result('get_columns_comments').table) %}
{% endmacro %}

{% macro get_columns_comments_via_information_schema(relation) -%}
{% call statement('get_columns_comments_via_information_schema', fetch_result=True) -%}
select
column_name,
full_data_type,
comment
from `system`.`information_schema`.`columns`
where
table_catalog = '{{ relation.database|lower }}' and
table_schema = '{{ relation.schema|lower }}' and
table_name = '{{ relation.identifier|lower }}'
{% endcall %}

{% do return(load_result('get_columns_comments_via_information_schema').table) %}
{% endmacro %}

{% macro databricks__persist_docs(relation, model, for_relation, for_columns) -%}
{%- if for_relation and config.persist_relation_docs() and model.description %}
{% do alter_table_comment(relation, model) %}
Expand Down
2 changes: 1 addition & 1 deletion dev-requirements.txt
Original file line number Diff line number Diff line change
Expand Up @@ -15,4 +15,4 @@ types-requests
types-mock
pre-commit

dbt-tests-adapter~=1.8.0
dbt-tests-adapter~=1.9.0
2 changes: 1 addition & 1 deletion requirements.txt
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
databricks-sql-connector>=3.1.0, <3.2.0
dbt-spark~=1.8.0
dbt-core>=1.8.0, <2.0
dbt-adapters>=1.3.0, <2.0
dbt-adapters>=1.6.0, <2.0
databricks-sdk==0.17.0
keyring>=23.13.0
pandas<2.2.0
Expand Down
13 changes: 13 additions & 0 deletions tests/functional/adapter/columns/fixtures.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,13 @@
base_model = """
select struct('a', 1, 'b', 'b', 'c', ARRAY(1,2,3)) as struct_col, 'hello' as str_col
"""

schema = """
version: 2
models:
- name: base_model
columns:
- name: struct_col
- name: str_col
"""
55 changes: 55 additions & 0 deletions tests/functional/adapter/columns/test_get_columns.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,55 @@
import pytest

from dbt.adapters.databricks.column import DatabricksColumn
from dbt.adapters.databricks.relation import DatabricksRelation
from tests.functional.adapter.columns import fixtures
from dbt.tests import util


class ColumnsInRelation:

@pytest.fixture(scope="class")
def models(self):
return {"base_model.sql": fixtures.base_model, "schema.yml": fixtures.schema}

@pytest.fixture(scope="class", autouse=True)
def setup(self, project):
util.run_dbt(["run"])

@pytest.fixture(scope="class")
def expected_columns(self):

return [
DatabricksColumn(
column="struct_col",
dtype=(
"struct<col1:string,col2:int,col3:string,"
"col4:string,col5:string,col6:array<int>>"
),
),
DatabricksColumn(column="str_col", dtype="string"),
]

def test_columns_in_relation(self, project, expected_columns):
my_relation = DatabricksRelation.create(
database=project.database,
schema=project.test_schema,
identifier="base_model",
type=DatabricksRelation.Table,
)

with project.adapter.connection_named("_test"):
actual_columns = project.adapter.get_columns_in_relation(my_relation)
assert actual_columns == expected_columns


class TestColumnsInRelationBehaviorFlagOff(ColumnsInRelation):
@pytest.fixture(scope="class")
def project_config_update(self):
return {"flags": {}}


class TestColumnsInRelationBehaviorFlagOn(ColumnsInRelation):
@pytest.fixture(scope="class")
def project_config_update(self):
return {"flags": {"column_types_from_information_schema": True}}

0 comments on commit 4b88d80

Please sign in to comment.