[DB Engine] Support old and new Presto syntax (#7977)

This commit is contained in:
Erik Ritter 2019-08-05 11:56:56 -07:00 committed by John Bodley
parent f7af50c399
commit d58dbad076
4 changed files with 38 additions and 7 deletions

View File

@ -647,6 +647,18 @@ Note that you can run the ``superset refresh_druid`` command to refresh the
metadata from your Druid cluster(s)
Presto
------
By default Superset assumes the most recent version of Presto is being used when
querying the datasource. If you're using an older version of presto, you can configure
it in the ``extra`` parameter::
{
"version": "0.123"
}
CORS
----

View File

@ -16,6 +16,7 @@
# under the License.
# pylint: disable=C,R,W
from collections import OrderedDict
from distutils.version import StrictVersion
import logging
import re
import textwrap
@ -797,7 +798,7 @@ class PrestoEngineSpec(BaseEngineSpec):
full_table_name = table_name
if schema_name and "." not in table_name:
full_table_name = "{}.{}".format(schema_name, table_name)
pql = cls._partition_query(full_table_name)
pql = cls._partition_query(full_table_name, database)
col_names, latest_parts = cls.latest_partition(
table_name, schema_name, database, show_first=True
)
@ -872,7 +873,9 @@ class PrestoEngineSpec(BaseEngineSpec):
return utils.error_msg_from_exception(e)
@classmethod
def _partition_query(cls, table_name, limit=0, order_by=None, filters=None):
def _partition_query(
cls, table_name, database, limit=0, order_by=None, filters=None
):
"""Returns a partition query
:param table_name: the name of the table to get partitions from
@ -900,10 +903,20 @@ class PrestoEngineSpec(BaseEngineSpec):
l.append(f"{field} = '{value}'")
where_clause = "WHERE " + " AND ".join(l)
presto_version = database.get_extra().get("version")
# Partition select syntax changed in v0.199, so check here.
# Default to the new syntax if version is unset.
partition_select_clause = (
f'SELECT * FROM "{table_name}$partitions"'
if not presto_version
or StrictVersion(presto_version) >= StrictVersion("0.199")
else f"SHOW PARTITIONS FROM {table_name}"
)
sql = textwrap.dedent(
f"""\
SELECT * FROM "{table_name}$partitions"
{partition_select_clause}
{where_clause}
{order_by_clause}
{limit_clause}
@ -965,7 +978,7 @@ class PrestoEngineSpec(BaseEngineSpec):
)
column_names = indexes[0]["column_names"]
part_fields = [(column_name, True) for column_name in column_names]
sql = cls._partition_query(table_name, 1, part_fields)
sql = cls._partition_query(table_name, database, 1, part_fields)
df = database.get_df(sql, schema)
return column_names, cls._latest_partition_from_df(df)
@ -1012,7 +1025,9 @@ class PrestoEngineSpec(BaseEngineSpec):
if field not in kwargs.keys():
field_to_return = field
sql = cls._partition_query(table_name, 1, [(field_to_return, True)], kwargs)
sql = cls._partition_query(
table_name, database, 1, [(field_to_return, True)], kwargs
)
df = database.get_df(sql, schema)
if df.empty:
return ""

View File

@ -143,7 +143,9 @@ class DatabaseMixin: # noqa
'Specify it as **"schemas_allowed_for_csv_upload": '
'["public", "csv_upload"]**. '
"If database flavor does not support schema or any schema is allowed "
"to be accessed, just leave the list empty",
"to be accessed, just leave the list empty"
"4. the ``version`` field is a string specifying the this db's version. "
"This should be used with Presto DBs so that the syntax is correct",
True,
),
"impersonate_user": _(

View File

@ -766,6 +766,7 @@ class DbEngineSpecsTestCase(SupersetTestCase):
def test_presto_extra_table_metadata(self):
db = mock.Mock()
db.get_indexes = mock.Mock(return_value=[{"column_names": ["ds", "hour"]}])
db.get_extra = mock.Mock(return_value={})
df = pd.DataFrame({"ds": ["01-01-19"], "hour": [1]})
db.get_df = mock.Mock(return_value=df)
result = PrestoEngineSpec.extra_table_metadata(db, "test_table", "test_schema")
@ -774,6 +775,7 @@ class DbEngineSpecsTestCase(SupersetTestCase):
def test_presto_where_latest_partition(self):
db = mock.Mock()
db.get_indexes = mock.Mock(return_value=[{"column_names": ["ds", "hour"]}])
db.get_extra = mock.Mock(return_value={})
df = pd.DataFrame({"ds": ["01-01-19"], "hour": [1]})
db.get_df = mock.Mock(return_value=df)
columns = [{"name": "ds"}, {"name": "hour"}]