fix: can't sync temporal flag on virtual table (#19366)

apache · Apr 1, 2022 · d954c3d · d954c3d
1 parent d684ad0
commit d954c3d
Show file tree

Hide file tree

Showing 15 changed files with 170 additions and 92 deletions.
diff --git a/superset-frontend/cypress-base/cypress/integration/sqllab/query.test.ts b/superset-frontend/cypress-base/cypress/integration/sqllab/query.test.ts
@@ -37,8 +37,8 @@ describe('SqlLab query panel', () => {
     const sampleResponse = {
       status: 'success',
       data: [{ '?column?': 1 }],
-      columns: [{ name: '?column?', type: 'INT', is_date: false }],
-      selected_columns: [{ name: '?column?', type: 'INT', is_date: false }],
+      columns: [{ name: '?column?', type: 'INT', is_dttm: false }],
+      selected_columns: [{ name: '?column?', type: 'INT', is_dttm: false }],
       expanded_columns: [],
     };
 

diff --git a/superset-frontend/src/SqlLab/components/ResultSet/index.tsx b/superset-frontend/src/SqlLab/components/ResultSet/index.tsx
@@ -280,7 +280,8 @@ export default class ResultSet extends React.PureComponent<
       sql,
       results.selected_columns.map(d => ({
         column_name: d.name,
-        is_dttm: d.is_date,
+        type: d.type,
+        is_dttm: d.is_dttm,
       })),
       datasetToOverwrite.owners.map((o: DatasetOwner) => o.id),
       true,

diff --git a/superset-frontend/src/SqlLab/fixtures.ts b/superset-frontend/src/SqlLab/fixtures.ts
@@ -221,24 +221,24 @@ export const queries = [
     results: {
       columns: [
         {
-          is_date: true,
+          is_dttm: true,
           name: 'ds',
           type: 'STRING',
         },
         {
-          is_date: false,
+          is_dttm: false,
           name: 'gender',
           type: 'STRING',
         },
       ],
       selected_columns: [
         {
-          is_date: true,
+          is_dttm: true,
           name: 'ds',
           type: 'STRING',
         },
         {
-          is_date: false,
+          is_dttm: false,
           name: 'gender',
           type: 'STRING',
         },
@@ -313,24 +313,24 @@ export const queryWithNoQueryLimit = {
   results: {
     columns: [
       {
-        is_date: true,
+        is_dttm: true,
         name: 'ds',
         type: 'STRING',
       },
       {
-        is_date: false,
+        is_dttm: false,
         name: 'gender',
         type: 'STRING',
       },
     ],
     selected_columns: [
       {
-        is_date: true,
+        is_dttm: true,
         name: 'ds',
         type: 'STRING',
       },
       {
-        is_date: false,
+        is_dttm: false,
         name: 'gender',
         type: 'STRING',
       },
@@ -350,57 +350,57 @@ export const queryWithBadColumns = {
     data: queries[0].results?.data,
     selected_columns: [
       {
-        is_date: true,
+        is_dttm: true,
         name: 'COUNT(*)',
         type: 'STRING',
       },
       {
-        is_date: false,
+        is_dttm: false,
         name: 'this_col_is_ok',
         type: 'STRING',
       },
       {
-        is_date: false,
+        is_dttm: false,
         name: 'a',
         type: 'STRING',
       },
       {
-        is_date: false,
+        is_dttm: false,
         name: '1',
         type: 'STRING',
       },
       {
-        is_date: false,
+        is_dttm: false,
         name: '123',
         type: 'STRING',
       },
       {
-        is_date: false,
+        is_dttm: false,
         name: 'CASE WHEN 1=1 THEN 1 ELSE 0 END',
         type: 'STRING',
       },
       {
-        is_date: true,
+        is_dttm: true,
         name: '_TIMESTAMP',
         type: 'TIMESTAMP',
       },
       {
-        is_date: true,
+        is_dttm: true,
         name: '__TIME',
         type: 'TIMESTAMP',
       },
       {
-        is_date: false,
+        is_dttm: false,
         name: 'my_dupe_col__2',
         type: 'STRING',
       },
       {
-        is_date: true,
+        is_dttm: true,
         name: '__timestamp',
         type: 'TIMESTAMP',
       },
       {
-        is_date: true,
+        is_dttm: true,
         name: '__TIMESTAMP',
         type: 'TIMESTAMP',
       },

diff --git a/superset-frontend/src/SqlLab/types.ts b/superset-frontend/src/SqlLab/types.ts
@@ -21,9 +21,11 @@ import { CtasEnum } from 'src/SqlLab/actions/sqlLab';
 import { UserWithPermissionsAndRoles } from 'src/types/bootstrapTypes';
 import { ToastType } from 'src/components/MessageToasts/types';
 
+// same as superset.result_set.ResultSetColumnType
 export type Column = {
   name: string;
-  is_date?: boolean;
+  type: string | null;
+  is_dttm: boolean;
 };
 
 export type QueryState =

diff --git a/superset/connectors/sqla/models.py b/superset/connectors/sqla/models.py
@@ -687,8 +687,9 @@ def sql_url(self) -> str:
         return self.database.sql_url + "?table_name=" + str(self.table_name)
 
     def external_metadata(self) -> List[Dict[str, str]]:
+        # todo(yongjie): create a pysical table column type in seprated PR
         if self.sql:
-            return get_virtual_table_metadata(dataset=self)
+            return get_virtual_table_metadata(dataset=self)  # type: ignore
         return get_physical_table_metadata(
             database=self.database,
             table_name=self.table_name,

diff --git a/superset/connectors/sqla/utils.py b/superset/connectors/sqla/utils.py
@@ -34,6 +34,7 @@
 from superset.models.core import Database
 from superset.result_set import SupersetResultSet
 from superset.sql_parse import has_table_query, insert_rls, ParsedQuery, Table
+from superset.superset_typing import ResultSetColumnType
 from superset.tables.models import Table as NewTable
 
 if TYPE_CHECKING:
@@ -91,7 +92,7 @@ def get_physical_table_metadata(
     return cols
 
 
-def get_virtual_table_metadata(dataset: "SqlaTable") -> List[Dict[str, str]]:
+def get_virtual_table_metadata(dataset: "SqlaTable") -> List[ResultSetColumnType]:
     """Use SQLparser to get virtual dataset metadata"""
     if not dataset.sql:
         raise SupersetGenericDBErrorException(

diff --git a/superset/db_engine_specs/base.py b/superset/db_engine_specs/base.py
@@ -62,6 +62,7 @@
 from superset.models.sql_lab import Query
 from superset.models.sql_types.base import literal_dttm_type_factory
 from superset.sql_parse import ParsedQuery, Table
+from superset.superset_typing import ResultSetColumnType
 from superset.utils import core as utils
 from superset.utils.core import ColumnSpec, GenericDataType
 from superset.utils.hashing import md5_sha_from_str
@@ -566,8 +567,10 @@ def fetch_data(
 
     @classmethod
     def expand_data(
-        cls, columns: List[Dict[Any, Any]], data: List[Dict[Any, Any]]
-    ) -> Tuple[List[Dict[Any, Any]], List[Dict[Any, Any]], List[Dict[Any, Any]]]:
+        cls, columns: List[ResultSetColumnType], data: List[Dict[Any, Any]]
+    ) -> Tuple[
+        List[ResultSetColumnType], List[Dict[Any, Any]], List[ResultSetColumnType]
+    ]:
         """
         Some engines support expanding nested fields. See implementation in Presto
         spec for details.

diff --git a/superset/db_engine_specs/presto.py b/superset/db_engine_specs/presto.py
@@ -53,6 +53,7 @@
 )
 from superset.result_set import destringify
 from superset.sql_parse import ParsedQuery
+from superset.superset_typing import ResultSetColumnType
 from superset.utils import core as utils
 from superset.utils.core import ColumnSpec, GenericDataType
 
@@ -85,24 +86,26 @@
 logger = logging.getLogger(__name__)
 
 
-def get_children(column: Dict[str, str]) -> List[Dict[str, str]]:
+def get_children(column: ResultSetColumnType) -> List[ResultSetColumnType]:
     """
     Get the children of a complex Presto type (row or array).
 
     For arrays, we return a single list with the base type:
 
-        >>> get_children(dict(name="a", type="ARRAY(BIGINT)"))
-        [{"name": "a", "type": "BIGINT"}]
+        >>> get_children(dict(name="a", type="ARRAY(BIGINT)", is_dttm=False))
+        [{"name": "a", "type": "BIGINT", "is_dttm": False}]
 
     For rows, we return a list of the columns:
 
-        >>> get_children(dict(name="a", type="ROW(BIGINT,FOO VARCHAR)"))
-        [{'name': 'a._col0', 'type': 'BIGINT'}, {'name': 'a.foo', 'type': 'VARCHAR'}]
+        >>> get_children(dict(name="a", type="ROW(BIGINT,FOO VARCHAR)",  is_dttm=False))
+        [{'name': 'a._col0', 'type': 'BIGINT', 'is_dttm': False}, {'name': 'a.foo', 'type': 'VARCHAR', 'is_dttm': False}]  # pylint: disable=line-too-long
 
     :param column: dictionary representing a Presto column
     :return: list of dictionaries representing children columns
     """
     pattern = re.compile(r"(?P<type>\w+)\((?P<children>.*)\)")
+    if not column["type"]:
+        raise ValueError
     match = pattern.match(column["type"])
     if not match:
         raise Exception(f"Unable to parse column type {column['type']}")
@@ -111,7 +114,7 @@ def get_children(column: Dict[str, str]) -> List[Dict[str, str]]:
     type_ = group["type"].upper()
     children_type = group["children"]
     if type_ == "ARRAY":
-        return [{"name": column["name"], "type": children_type}]
+        return [{"name": column["name"], "type": children_type, "is_dttm": False}]
 
     if type_ == "ROW":
         nameless_columns = 0
@@ -125,7 +128,12 @@ def get_children(column: Dict[str, str]) -> List[Dict[str, str]]:
                 name = f"_col{nameless_columns}"
                 type_ = parts[0]
                 nameless_columns += 1
-            columns.append({"name": f"{column['name']}.{name.lower()}", "type": type_})
+            _column: ResultSetColumnType = {
+                "name": f"{column['name']}.{name.lower()}",
+                "type": type_,
+                "is_dttm": False,
+            }
+            columns.append(_column)
         return columns
 
     raise Exception(f"Unknown type {type_}!")
@@ -779,8 +787,10 @@ def get_all_datasource_names(
 
     @classmethod
     def expand_data(  # pylint: disable=too-many-locals
-        cls, columns: List[Dict[Any, Any]], data: List[Dict[Any, Any]]
-    ) -> Tuple[List[Dict[Any, Any]], List[Dict[Any, Any]], List[Dict[Any, Any]]]:
+        cls, columns: List[ResultSetColumnType], data: List[Dict[Any, Any]]
+    ) -> Tuple[
+        List[ResultSetColumnType], List[Dict[Any, Any]], List[ResultSetColumnType]
+    ]:
         """
         We do not immediately display rows and arrays clearly in the data grid. This
         method separates out nested fields and data values to help clearly display
@@ -808,7 +818,7 @@ def expand_data(  # pylint: disable=too-many-locals
         # process each column, unnesting ARRAY types and
         # expanding ROW types into new columns
         to_process = deque((column, 0) for column in columns)
-        all_columns: List[Dict[str, Any]] = []
+        all_columns: List[ResultSetColumnType] = []
         expanded_columns = []
         current_array_level = None
         while to_process:
@@ -828,7 +838,7 @@ def expand_data(  # pylint: disable=too-many-locals
             name = column["name"]
             values: Optional[Union[str, List[Any]]]
 
-            if column["type"].startswith("ARRAY("):
+            if column["type"] and column["type"].startswith("ARRAY("):
                 # keep processing array children; we append to the right so that
                 # multiple nested arrays are processed breadth-first
                 to_process.append((get_children(column)[0], level + 1))
@@ -862,7 +872,7 @@ def expand_data(  # pylint: disable=too-many-locals
 
                     i += 1
 
-            if column["type"].startswith("ROW("):
+            if column["type"] and column["type"].startswith("ROW("):
                 # expand columns; we append them to the left so they are added
                 # immediately after the parent
                 expanded = get_children(column)

diff --git a/superset/result_set.py b/superset/result_set.py
@@ -26,7 +26,7 @@
 import pyarrow as pa
 
 from superset.db_engine_specs import BaseEngineSpec
-from superset.superset_typing import DbapiDescription, DbapiResult
+from superset.superset_typing import DbapiDescription, DbapiResult, ResultSetColumnType
 from superset.utils import core as utils
 
 logger = logging.getLogger(__name__)
@@ -210,17 +210,17 @@ def size(self) -> int:
         return self.table.num_rows
 
     @property
-    def columns(self) -> List[Dict[str, Any]]:
+    def columns(self) -> List[ResultSetColumnType]:
         if not self.table.column_names:
             return []
 
         columns = []
         for col in self.table.schema:
             db_type_str = self.data_type(col.name, col.type)
-            column = {
+            column: ResultSetColumnType = {
                 "name": col.name,
                 "type": db_type_str,
-                "is_date": self.is_temporal(db_type_str),
+                "is_dttm": self.is_temporal(db_type_str),
             }
             columns.append(column)
 

diff --git a/superset/superset_typing.py b/superset/superset_typing.py
@@ -57,6 +57,16 @@ class AdhocColumn(TypedDict, total=False):
     sqlExpression: Optional[str]
 
 
+class ResultSetColumnType(TypedDict):
+    """
+    Superset virtual dataset column interface
+    """
+
+    name: str
+    type: Optional[str]
+    is_dttm: bool
+
+
 CacheConfig = Dict[str, Any]
 DbapiDescriptionRow = Tuple[
     str, str, Optional[str], Optional[str], Optional[int], Optional[int], bool

diff --git a/superset/views/core.py b/superset/views/core.py
@@ -2109,7 +2109,7 @@ def sqllab_viz(self) -> FlaskResponse:  # pylint: disable=no-self-use
                 column_name=column_name,
                 filterable=True,
                 groupby=True,
-                is_dttm=config_.get("is_date", False),
+                is_dttm=config_.get("is_dttm", False),
                 type=config_.get("type", False),
             )
             cols.append(col)

diff --git a/tests/integration_tests/celery_tests.py b/tests/integration_tests/celery_tests.py
@@ -130,8 +130,8 @@ def cta_result(ctas_method: CtasMethod):
     if backend() != "presto":
         return [], []
     if ctas_method == CtasMethod.TABLE:
-        return [{"rows": 1}], [{"name": "rows", "type": "BIGINT", "is_date": False}]
-    return [{"result": True}], [{"name": "result", "type": "BOOLEAN", "is_date": False}]
+        return [{"rows": 1}], [{"name": "rows", "type": "BIGINT", "is_dttm": False}]
+    return [{"result": True}], [{"name": "result", "type": "BOOLEAN", "is_dttm": False}]
 
 
 # TODO(bkyryliuk): quote table and schema names for all databases