Skip to content
This repository was archived by the owner on May 17, 2024. It is now read-only.

Add column types to json output #638

Merged
merged 5 commits into from
Jul 16, 2023
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
12 changes: 11 additions & 1 deletion data_diff/dbt.py
Original file line number Diff line number Diff line change
Expand Up @@ -308,13 +308,23 @@ def _local_diff(diff_vars: TDiffVars, json_output: bool = False) -> None:
)
return

dataset1_columns = [
(name, type_, table1.database.dialect.parse_type(table1.table_path, name, type_, *other))
for (name, type_, *other) in table1_columns.values()
]
dataset2_columns = [
(name, type_, table2.database.dialect.parse_type(table2.table_path, name, type_, *other))
for (name, type_, *other) in table2_columns.values()
]
print(
json.dumps(
jsonify(
diff,
dbt_model=diff_vars.dbt_model,
dataset1_columns=dataset1_columns,
dataset2_columns=dataset2_columns,
with_summary=True,
with_columns={
columns_diff={
"added": columns_added,
"removed": columns_removed,
"changed": columns_type_changed,
Expand Down
86 changes: 77 additions & 9 deletions data_diff/format.py
Original file line number Diff line number Diff line change
@@ -1,8 +1,23 @@
import collections
from typing import Any, Optional, List, Dict, Tuple
from enum import Enum
from typing import Any, Optional, List, Dict, Tuple, Type

from runtype import dataclass
from data_diff.diff_tables import DiffResultWrapper
from data_diff.sqeleton.abcs.database_types import (
JSON,
Boolean,
ColType,
Array,
ColType_UUID,
Date,
FractionalType,
NumericType,
Struct,
TemporalType,
ColType_Alphanum,
String_Alphanum,
)


def jsonify_error(table1: List[str], table2: List[str], dbt_model: str, error: str) -> "FailedDiff":
Expand All @@ -15,11 +30,16 @@ def jsonify_error(table1: List[str], table2: List[str], dbt_model: str, error: s
).json()


Columns = List[Tuple[str, str, ColType]]


def jsonify(
diff: DiffResultWrapper,
dbt_model: str,
dataset1_columns: Columns,
dataset2_columns: Columns,
columns_diff: Dict[str, List[str]],
with_summary: bool = False,
with_columns: Optional[Dict[str, List[str]]] = None,
) -> "JsonDiff":
"""
Converts the diff result into a JSON-serializable format.
Expand Down Expand Up @@ -53,16 +73,13 @@ def jsonify(
if with_summary:
summary = _jsonify_diff_summary(diff.get_stats_dict(is_dbt=True))

columns = None
if with_columns:
columns = _jsonify_columns_diff(with_columns, list(key_columns))
columns = _jsonify_columns_diff(dataset1_columns, dataset2_columns, columns_diff, list(key_columns))

is_different = bool(
t1_exclusive_rows
or t2_exclusive_rows
or diff_rows
or with_columns
and (with_columns["added"] or with_columns["removed"] or with_columns["changed"])
or (columns_diff["added"] or columns_diff["removed"] or columns_diff["changed"])
)
return JsonDiff(
status="success",
Expand Down Expand Up @@ -138,8 +155,44 @@ class ExclusiveColumns:
dataset2: List[str]


class ColumnKind(Enum):
INTEGER = "integer"
FLOAT = "float"
STRING = "string"
DATE = "date"
TIME = "time"
DATETIME = "datetime"
BOOL = "boolean"
UNSUPPORTED = "unsupported"


KIND_MAPPING: List[Tuple[Type[ColType], ColumnKind]] = [
(Boolean, ColumnKind.BOOL),
(Date, ColumnKind.DATE),
(TemporalType, ColumnKind.DATETIME),
(FractionalType, ColumnKind.FLOAT),
(NumericType, ColumnKind.INTEGER),
(ColType_UUID, ColumnKind.STRING),
(ColType_Alphanum, ColumnKind.STRING),
(String_Alphanum, ColumnKind.STRING),
(JSON, ColumnKind.STRING),
(Array, ColumnKind.STRING),
(Struct, ColumnKind.STRING),
(ColType, ColumnKind.UNSUPPORTED),
]


@dataclass
class Column:
name: str
type: str
kind: str


@dataclass
class JsonColumnsSummary:
dataset1: List[Column]
dataset2: List[Column]
primaryKey: List[str]
exclusive: ExclusiveColumns
typeChanged: List[str]
Expand Down Expand Up @@ -179,7 +232,7 @@ class JsonDiff:
summary: Optional[JsonDiffSummary]
columns: Optional[JsonColumnsSummary]

version: str = "1.0.0"
version: str = "1.1.0"


def _group_rows(
Expand Down Expand Up @@ -262,12 +315,27 @@ def _jsonify_diff_summary(stats_dict: dict) -> JsonDiffSummary:
)


def _jsonify_columns_diff(columns_diff: Dict[str, List[str]], key_columns: List[str]) -> JsonColumnsSummary:
def _jsonify_columns_diff(
dataset1_columns: Columns, dataset2_columns: Columns, columns_diff: Dict[str, List[str]], key_columns: List[str]
) -> JsonColumnsSummary:
return JsonColumnsSummary(
dataset1=[
Column(name=name, type=type_, kind=_map_kind(kind).value) for (name, type_, kind) in dataset1_columns
],
dataset2=[
Column(name=name, type=type_, kind=_map_kind(kind).value) for (name, type_, kind) in dataset2_columns
],
primaryKey=key_columns,
exclusive=ExclusiveColumns(
dataset2=list(columns_diff.get("added", [])),
dataset1=list(columns_diff.get("removed", [])),
),
typeChanged=list(columns_diff.get("changed", [])),
)


def _map_kind(kind: ColType) -> ColumnKind:
for raw_kind, json_kind in KIND_MAPPING:
if isinstance(kind, raw_kind):
return json_kind
return ColumnKind.UNSUPPORTED
113 changes: 104 additions & 9 deletions tests/test_format.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
import unittest
from data_diff.diff_tables import DiffResultWrapper, InfoTree, SegmentInfo, TableSegment
from data_diff.format import jsonify
from data_diff.sqeleton.abcs.database_types import Integer
from data_diff.sqeleton.databases import Database


Expand Down Expand Up @@ -35,11 +36,28 @@ def test_jsonify_diff(self):
diff=[],
stats={},
)
json_diff = jsonify(diff, dbt_model="my_model")
json_diff = jsonify(
diff,
dbt_model="my_model",
dataset1_columns=[
("id", "NUMBER", Integer()),
("value", "NUMBER", Integer()),
],
dataset2_columns=[
("id", "NUMBER", Integer()),
("value", "NUMBER", Integer()),
],
columns_diff={
"added": [],
"removed": [],
"typeChanged": [],
},
)

self.assertEqual(
json_diff,
{
"version": "1.0.0",
"version": "1.1.0",
"status": "success",
"result": "different",
"model": "my_model",
Expand All @@ -57,8 +75,23 @@ def test_jsonify_diff(self):
},
],
},
"columns": {
"dataset1": [
{"name": "id", "type": "NUMBER", "kind": "integer"},
{"name": "value", "type": "NUMBER", "kind": "integer"},
],
"dataset2": [
{"name": "id", "type": "NUMBER", "kind": "integer"},
{"name": "value", "type": "NUMBER", "kind": "integer"},
],
"primaryKey": ["id"],
"exclusive": {
"dataset1": [],
"dataset2": [],
},
"typeChanged": [],
},
"summary": None,
"columns": None,
},
)

Expand Down Expand Up @@ -86,11 +119,27 @@ def test_jsonify_diff_no_difeference(self):
diff=[],
stats={},
)
json_diff = jsonify(diff, dbt_model="model")
json_diff = jsonify(
diff,
dbt_model="model",
dataset1_columns=[
("id", "NUMBER", Integer()),
("value", "NUMBER", Integer()),
],
dataset2_columns=[
("id", "NUMBER", Integer()),
("value", "NUMBER", Integer()),
],
columns_diff={
"added": [],
"removed": [],
"changed": [],
},
)
self.assertEqual(
json_diff,
{
"version": "1.0.0",
"version": "1.1.0",
"status": "success",
"result": "identical",
"model": "model",
Expand All @@ -100,8 +149,23 @@ def test_jsonify_diff_no_difeference(self):
"exclusive": {"dataset1": [], "dataset2": []},
"diff": [],
},
"columns": {
"primaryKey": ["id"],
"dataset1": [
{"name": "id", "type": "NUMBER", "kind": "integer"},
{"name": "value", "type": "NUMBER", "kind": "integer"},
],
"dataset2": [
{"name": "id", "type": "NUMBER", "kind": "integer"},
{"name": "value", "type": "NUMBER", "kind": "integer"},
],
"exclusive": {
"dataset1": [],
"dataset2": [],
},
"typeChanged": [],
},
"summary": None,
"columns": None,
},
)

Expand Down Expand Up @@ -133,11 +197,27 @@ def test_jsonify_column_suffix_fix(self):
diff=[],
stats={},
)
json_diff = jsonify(diff, dbt_model="my_model")
json_diff = jsonify(
diff,
dbt_model="my_model",
dataset1_columns=[
("id_a", "NUMBER", Integer()),
("value_b", "NUMBER", Integer()),
],
dataset2_columns=[
("id_a", "NUMBER", Integer()),
("value_b", "NUMBER", Integer()),
],
columns_diff={
"added": [],
"removed": [],
"typeChanged": [],
},
)
self.assertEqual(
json_diff,
{
"version": "1.0.0",
"version": "1.1.0",
"status": "success",
"result": "different",
"model": "my_model",
Expand All @@ -158,6 +238,21 @@ def test_jsonify_column_suffix_fix(self):
],
},
"summary": None,
"columns": None,
"columns": {
"dataset1": [
{"name": "id_a", "type": "NUMBER", "kind": "integer"},
{"name": "value_b", "type": "NUMBER", "kind": "integer"},
],
"dataset2": [
{"name": "id_a", "type": "NUMBER", "kind": "integer"},
{"name": "value_b", "type": "NUMBER", "kind": "integer"},
],
"primaryKey": ["id_a"],
"exclusive": {
"dataset1": [],
"dataset2": [],
},
"typeChanged": [],
},
},
)