From d58dbad076f61cfd05fa8280ad13442d29360873 Mon Sep 17 00:00:00 2001 From: Erik Ritter Date: Mon, 5 Aug 2019 11:56:56 -0700 Subject: [PATCH] [DB Engine] Support old and new Presto syntax (#7977) --- docs/installation.rst | 12 ++++++++++++ superset/db_engine_specs/presto.py | 27 +++++++++++++++++++++------ superset/views/database/__init__.py | 4 +++- tests/db_engine_specs_test.py | 2 ++ 4 files changed, 38 insertions(+), 7 deletions(-) diff --git a/docs/installation.rst b/docs/installation.rst index 68bc5ce06b189..919d6ed77fa53 100644 --- a/docs/installation.rst +++ b/docs/installation.rst @@ -647,6 +647,18 @@ Note that you can run the ``superset refresh_druid`` command to refresh the metadata from your Druid cluster(s) +Presto +------ + +By default Superset assumes the most recent version of Presto is being used when +querying the datasource. If you're using an older version of presto, you can configure +it in the ``extra`` parameter:: + + { + "version": "0.123" + } + + CORS ---- diff --git a/superset/db_engine_specs/presto.py b/superset/db_engine_specs/presto.py index 4f4b03ef7793b..6708b0671a2c9 100644 --- a/superset/db_engine_specs/presto.py +++ b/superset/db_engine_specs/presto.py @@ -16,6 +16,7 @@ # under the License. # pylint: disable=C,R,W from collections import OrderedDict +from distutils.version import StrictVersion import logging import re import textwrap @@ -797,7 +798,7 @@ def extra_table_metadata(cls, database, table_name, schema_name): full_table_name = table_name if schema_name and "." not in table_name: full_table_name = "{}.{}".format(schema_name, table_name) - pql = cls._partition_query(full_table_name) + pql = cls._partition_query(full_table_name, database) col_names, latest_parts = cls.latest_partition( table_name, schema_name, database, show_first=True ) @@ -872,7 +873,9 @@ def extract_error_message(cls, e): return utils.error_msg_from_exception(e) @classmethod - def _partition_query(cls, table_name, limit=0, order_by=None, filters=None): + def _partition_query( + cls, table_name, database, limit=0, order_by=None, filters=None + ): """Returns a partition query :param table_name: the name of the table to get partitions from @@ -900,10 +903,20 @@ def _partition_query(cls, table_name, limit=0, order_by=None, filters=None): l.append(f"{field} = '{value}'") where_clause = "WHERE " + " AND ".join(l) + presto_version = database.get_extra().get("version") + + # Partition select syntax changed in v0.199, so check here. + # Default to the new syntax if version is unset. + partition_select_clause = ( + f'SELECT * FROM "{table_name}$partitions"' + if not presto_version + or StrictVersion(presto_version) >= StrictVersion("0.199") + else f"SHOW PARTITIONS FROM {table_name}" + ) + sql = textwrap.dedent( f"""\ - SELECT * FROM "{table_name}$partitions" - + {partition_select_clause} {where_clause} {order_by_clause} {limit_clause} @@ -965,7 +978,7 @@ def latest_partition(cls, table_name, schema, database, show_first=False): ) column_names = indexes[0]["column_names"] part_fields = [(column_name, True) for column_name in column_names] - sql = cls._partition_query(table_name, 1, part_fields) + sql = cls._partition_query(table_name, database, 1, part_fields) df = database.get_df(sql, schema) return column_names, cls._latest_partition_from_df(df) @@ -1012,7 +1025,9 @@ def latest_sub_partition(cls, table_name, schema, database, **kwargs): if field not in kwargs.keys(): field_to_return = field - sql = cls._partition_query(table_name, 1, [(field_to_return, True)], kwargs) + sql = cls._partition_query( + table_name, database, 1, [(field_to_return, True)], kwargs + ) df = database.get_df(sql, schema) if df.empty: return "" diff --git a/superset/views/database/__init__.py b/superset/views/database/__init__.py index 15b0d5228fb1d..9b7c8cf891210 100644 --- a/superset/views/database/__init__.py +++ b/superset/views/database/__init__.py @@ -143,7 +143,9 @@ class DatabaseMixin: # noqa 'Specify it as **"schemas_allowed_for_csv_upload": ' '["public", "csv_upload"]**. ' "If database flavor does not support schema or any schema is allowed " - "to be accessed, just leave the list empty", + "to be accessed, just leave the list empty" + "4. the ``version`` field is a string specifying the this db's version. " + "This should be used with Presto DBs so that the syntax is correct", True, ), "impersonate_user": _( diff --git a/tests/db_engine_specs_test.py b/tests/db_engine_specs_test.py index c9c398f8de304..a70c8cdb2ad87 100644 --- a/tests/db_engine_specs_test.py +++ b/tests/db_engine_specs_test.py @@ -766,6 +766,7 @@ def test_presto_expand_data_with_complex_array_columns(self): def test_presto_extra_table_metadata(self): db = mock.Mock() db.get_indexes = mock.Mock(return_value=[{"column_names": ["ds", "hour"]}]) + db.get_extra = mock.Mock(return_value={}) df = pd.DataFrame({"ds": ["01-01-19"], "hour": [1]}) db.get_df = mock.Mock(return_value=df) result = PrestoEngineSpec.extra_table_metadata(db, "test_table", "test_schema") @@ -774,6 +775,7 @@ def test_presto_extra_table_metadata(self): def test_presto_where_latest_partition(self): db = mock.Mock() db.get_indexes = mock.Mock(return_value=[{"column_names": ["ds", "hour"]}]) + db.get_extra = mock.Mock(return_value={}) df = pd.DataFrame({"ds": ["01-01-19"], "hour": [1]}) db.get_df = mock.Mock(return_value=df) columns = [{"name": "ds"}, {"name": "hour"}]