datafold · teraamp · Jan 15, 2024 · Jan 14, 2024 · Jan 14, 2024 · Jan 15, 2024
diff --git a/data_diff/abcs/database_types.py b/data_diff/abcs/database_types.py
@@ -156,6 +156,11 @@ class Date(TemporalType):
     pass
 
 
+@attrs.define(frozen=True)
+class Time(TemporalType):
+    pass
+
+
 @attrs.define(frozen=True)
 class NumericType(ColType):
     # 'precision' signifies how many fractional digits (after the dot) we want to compare

diff --git a/data_diff/databases/bigquery.py b/data_diff/databases/bigquery.py
@@ -19,6 +19,8 @@
     TemporalType,
     Boolean,
     UnknownColType,
+    Time,
+    Date,
 )
 from data_diff.databases.base import (
     BaseDialect,
@@ -63,6 +65,8 @@ class Dialect(BaseDialect):
         # Dates
         "TIMESTAMP": Timestamp,
         "DATETIME": Datetime,
+        "DATE": Date,
+        "TIME": Time,
         # Numbers
         "INT64": Integer,
         "INT32": Integer,
@@ -160,6 +164,21 @@ def md5_as_hex(self, s: str) -> str:
         return f"md5({s})"
 
     def normalize_timestamp(self, value: str, coltype: TemporalType) -> str:
+        try:
+            is_date = coltype.is_date
+            is_time = coltype.is_time
+        except:
+            is_date = False
+            is_time = False
+        if isinstance(coltype, Date) or is_date:
+            return f"FORMAT_DATE('%F', {value})"
+        if isinstance(coltype, Time) or is_time:
+            microseconds = f"TIME_DIFF( {value}, cast('00:00:00' as time), microsecond)"
+            rounded = f"ROUND({microseconds}, -6 + {coltype.precision})"
+            time_value = f"TIME_ADD(cast('00:00:00' as time), interval cast({rounded} as int64) microsecond)"
+            converted = f"FORMAT_TIME('%H:%M:%E6S', {time_value})"
+            return converted
+
         if coltype.rounds:
             timestamp = f"timestamp_micros(cast(round(unix_micros(cast({value} as timestamp))/1000000, {coltype.precision})*1000000 as int))"
             return f"FORMAT_TIMESTAMP('%F %H:%M:%E6S', {timestamp})"

diff --git a/data_diff/databases/databricks.py b/data_diff/databases/databricks.py
@@ -92,7 +92,12 @@ def md5_as_hex(self, s: str) -> str:
 
     def normalize_timestamp(self, value: str, coltype: TemporalType) -> str:
         """Databricks timestamp contains no more than 6 digits in precision"""
-
+        try:
+            is_date = coltype.is_date
+        except:
+            is_date = False
+        if isinstance(coltype, Date) or is_date:
+            return f"date_format({value}, 'yyyy-MM-dd')"
         if coltype.rounds:
             # cast to timestamp due to unix_micros() requiring timestamp
             timestamp = f"cast(round(unix_micros(cast({value} as timestamp)) / 1000000, {coltype.precision}) * 1000000 as bigint)"

diff --git a/data_diff/databases/mssql.py b/data_diff/databases/mssql.py
@@ -13,7 +13,6 @@
 )
 from data_diff.abcs.database_types import (
     JSON,
-    Date,
     NumericType,
     Timestamp,
     TimestampTZ,
@@ -25,6 +24,8 @@
     Native_UUID,
     Text,
     Boolean,
+    Date,
+    Time
 )
 
 
@@ -48,6 +49,7 @@ class Dialect(BaseDialect):
         "datetime2": Timestamp,
         "smalldatetime": Timestamp,
         "date": Date,
+        "time": Time,
         # Numbers
         "float": Float,
         "real": Float,

diff --git a/data_diff/databases/postgresql.py b/data_diff/databases/postgresql.py
@@ -17,6 +17,7 @@
     FractionalType,
     Boolean,
     Date,
+    Time
 )
 from data_diff.databases.base import BaseDialect, ThreadedDatabase, import_helper, ConnectError
 from data_diff.databases.base import (
@@ -57,6 +58,8 @@ class PostgresqlDialect(BaseDialect):
         "timestamp without time zone": Timestamp,
         "timestamp": Timestamp,
         "date": Date,
+        "time with time zone": Time,
+        "time without time zone": Time,
         # Numbers
         "double precision": Float,
         "real": Float,
@@ -111,6 +114,23 @@ def normalize_timestamp(self, value: str, coltype: TemporalType) -> str:
         def _add_padding(coltype: TemporalType, timestamp6: str):
             return f"RPAD(LEFT({timestamp6}, {TIMESTAMP_PRECISION_POS+coltype.precision}), {TIMESTAMP_PRECISION_POS+6}, '0')"
 
+        try:
+            is_date = coltype.is_date
+            is_time = coltype.is_time
+        except:
+            is_date = False
+            is_time = False
+
+        if isinstance(coltype, Date) or is_date:
+            return f"cast({value} as varchar)"
+
+        if isinstance(coltype, Time) or is_time:
+            seconds = f"EXTRACT( epoch from {value})"
+            rounded = f"ROUND({seconds},  {coltype.precision})"
+            time_value = f"CAST('00:00:00' as time) + make_interval(0, 0, 0, 0, 0, 0, {rounded})"  # 6th arg = seconds
+            converted = f"to_char({time_value}, 'hh24:mi:ss.ff6')"
+            return converted
+
         if coltype.rounds:
             # NULL value expected to return NULL after normalization
             null_case_begin = f"CASE WHEN {value} IS NULL THEN NULL ELSE "

diff --git a/data_diff/databases/snowflake.py b/data_diff/databases/snowflake.py
@@ -15,6 +15,7 @@
     DbPath,
     Boolean,
     Date,
+    Time,
 )
 from data_diff.databases.base import (
     BaseDialect,
@@ -45,6 +46,7 @@ class Dialect(BaseDialect):
         "TIMESTAMP_LTZ": Timestamp,
         "TIMESTAMP_TZ": TimestampTZ,
         "DATE": Date,
+        "TIME": Time,
         # Numbers
         "NUMBER": Decimal,
         "FLOAT": Float,
@@ -81,6 +83,21 @@ def md5_as_hex(self, s: str) -> str:
         return f"md5({s})"
 
     def normalize_timestamp(self, value: str, coltype: TemporalType) -> str:
+        try:
+            is_date = coltype.is_date
+            is_time = coltype.is_time
+        except:
+            is_date = False
+            is_time = False
+        if isinstance(coltype, Date) or is_date:
+            return f"({value}::varchar)"
+        elif isinstance(coltype, Time) or is_time:
+            microseconds = f"TIMEDIFF(microsecond, cast('00:00:00' as time), {value})"
+            rounded = f"round({microseconds}, -6 + {coltype.precision})"
+            time_value = f"TIMEADD(microsecond, {rounded}, cast('00:00:00' as time))"
+            converted = f"TO_VARCHAR({time_value}, 'HH24:MI:SS.FF6')"
+            return converted
+
         if coltype.rounds:
             timestamp = f"to_timestamp(round(date_part(epoch_nanosecond, convert_timezone('UTC', {value})::timestamp(9))/1000000000, {coltype.precision}))"
         else: