Skip to content

Commit

Permalink
[DB Engine] Support old and new Presto syntax (#7977)
Browse files Browse the repository at this point in the history
  • Loading branch information
Erik Ritter authored and john-bodley committed Aug 5, 2019
1 parent f7af50c commit d58dbad
Show file tree
Hide file tree
Showing 4 changed files with 38 additions and 7 deletions.
12 changes: 12 additions & 0 deletions docs/installation.rst
Original file line number Diff line number Diff line change
Expand Up @@ -647,6 +647,18 @@ Note that you can run the ``superset refresh_druid`` command to refresh the
metadata from your Druid cluster(s)


Presto
------

By default Superset assumes the most recent version of Presto is being used when
querying the datasource. If you're using an older version of presto, you can configure
it in the ``extra`` parameter::

{
"version": "0.123"
}


CORS
----

Expand Down
27 changes: 21 additions & 6 deletions superset/db_engine_specs/presto.py
Original file line number Diff line number Diff line change
Expand Up @@ -16,6 +16,7 @@
# under the License.
# pylint: disable=C,R,W
from collections import OrderedDict
from distutils.version import StrictVersion
import logging
import re
import textwrap
Expand Down Expand Up @@ -797,7 +798,7 @@ def extra_table_metadata(cls, database, table_name, schema_name):
full_table_name = table_name
if schema_name and "." not in table_name:
full_table_name = "{}.{}".format(schema_name, table_name)
pql = cls._partition_query(full_table_name)
pql = cls._partition_query(full_table_name, database)
col_names, latest_parts = cls.latest_partition(
table_name, schema_name, database, show_first=True
)
Expand Down Expand Up @@ -872,7 +873,9 @@ def extract_error_message(cls, e):
return utils.error_msg_from_exception(e)

@classmethod
def _partition_query(cls, table_name, limit=0, order_by=None, filters=None):
def _partition_query(
cls, table_name, database, limit=0, order_by=None, filters=None
):
"""Returns a partition query
:param table_name: the name of the table to get partitions from
Expand Down Expand Up @@ -900,10 +903,20 @@ def _partition_query(cls, table_name, limit=0, order_by=None, filters=None):
l.append(f"{field} = '{value}'")
where_clause = "WHERE " + " AND ".join(l)

presto_version = database.get_extra().get("version")

# Partition select syntax changed in v0.199, so check here.
# Default to the new syntax if version is unset.
partition_select_clause = (
f'SELECT * FROM "{table_name}$partitions"'
if not presto_version
or StrictVersion(presto_version) >= StrictVersion("0.199")
else f"SHOW PARTITIONS FROM {table_name}"
)

sql = textwrap.dedent(
f"""\
SELECT * FROM "{table_name}$partitions"
{partition_select_clause}
{where_clause}
{order_by_clause}
{limit_clause}
Expand Down Expand Up @@ -965,7 +978,7 @@ def latest_partition(cls, table_name, schema, database, show_first=False):
)
column_names = indexes[0]["column_names"]
part_fields = [(column_name, True) for column_name in column_names]
sql = cls._partition_query(table_name, 1, part_fields)
sql = cls._partition_query(table_name, database, 1, part_fields)
df = database.get_df(sql, schema)
return column_names, cls._latest_partition_from_df(df)

Expand Down Expand Up @@ -1012,7 +1025,9 @@ def latest_sub_partition(cls, table_name, schema, database, **kwargs):
if field not in kwargs.keys():
field_to_return = field

sql = cls._partition_query(table_name, 1, [(field_to_return, True)], kwargs)
sql = cls._partition_query(
table_name, database, 1, [(field_to_return, True)], kwargs
)
df = database.get_df(sql, schema)
if df.empty:
return ""
Expand Down
4 changes: 3 additions & 1 deletion superset/views/database/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -143,7 +143,9 @@ class DatabaseMixin: # noqa
'Specify it as **"schemas_allowed_for_csv_upload": '
'["public", "csv_upload"]**. '
"If database flavor does not support schema or any schema is allowed "
"to be accessed, just leave the list empty",
"to be accessed, just leave the list empty"
"4. the ``version`` field is a string specifying the this db's version. "
"This should be used with Presto DBs so that the syntax is correct",
True,
),
"impersonate_user": _(
Expand Down
2 changes: 2 additions & 0 deletions tests/db_engine_specs_test.py
Original file line number Diff line number Diff line change
Expand Up @@ -766,6 +766,7 @@ def test_presto_expand_data_with_complex_array_columns(self):
def test_presto_extra_table_metadata(self):
db = mock.Mock()
db.get_indexes = mock.Mock(return_value=[{"column_names": ["ds", "hour"]}])
db.get_extra = mock.Mock(return_value={})
df = pd.DataFrame({"ds": ["01-01-19"], "hour": [1]})
db.get_df = mock.Mock(return_value=df)
result = PrestoEngineSpec.extra_table_metadata(db, "test_table", "test_schema")
Expand All @@ -774,6 +775,7 @@ def test_presto_extra_table_metadata(self):
def test_presto_where_latest_partition(self):
db = mock.Mock()
db.get_indexes = mock.Mock(return_value=[{"column_names": ["ds", "hour"]}])
db.get_extra = mock.Mock(return_value={})
df = pd.DataFrame({"ds": ["01-01-19"], "hour": [1]})
db.get_df = mock.Mock(return_value=df)
columns = [{"name": "ds"}, {"name": "hour"}]
Expand Down

0 comments on commit d58dbad

Please sign in to comment.