From 9ff351532ada993afc4da5060b9a942216e2a74b Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Marek=20Vavru=C5=A1a?= Date: Wed, 3 May 2017 13:57:39 -0700 Subject: [PATCH] Basic integration with ClickHouse (#1844) * db_engine_specs: added engine spec for clickhouse * models: group by time without join on subquery if this is enabled, it will remove the subquery when grouping by time, and group by time will be rewritten as group by timeslot, ... this particularly fixes clickhouse backend * docs: added clickhouse * models: allow using secondary dttm columns as constraints * superset/forms: configurable default slice since time * superset/models: respect dialect quoting * dttm handling, codeclimate tweaks --- docs/installation.rst | 3 +++ superset/connectors/sqla/models.py | 22 +++++++++++----- superset/db_engine_specs.py | 40 ++++++++++++++++++++++++++++++ superset/viz.py | 4 ++- 4 files changed, 62 insertions(+), 7 deletions(-) diff --git a/docs/installation.rst b/docs/installation.rst index 8b11312ca..71907022e 100644 --- a/docs/installation.rst +++ b/docs/installation.rst @@ -211,6 +211,9 @@ Here's a list of some of the recommended packages. | Vertica | ``pip install | ``vertica+vertica_python://`` | | | sqlalchemy-vertica-python`` | | +---------------+-------------------------------------+-------------------------------------------------+ +| ClickHouse | ``pip install | ``clickhouse://`` | +| | sqlalchemy-clickhouse`` | | ++---------------+-------------------------------------+-------------------------------------------------+ Note that many other database are supported, the main criteria being the existence of a functional SqlAlchemy dialect and Python driver. Googling diff --git a/superset/connectors/sqla/models.py b/superset/connectors/sqla/models.py index 913ee5957..ba0131d78 100644 --- a/superset/connectors/sqla/models.py +++ b/superset/connectors/sqla/models.py @@ -370,6 +370,9 @@ class SqlaTable(Model, BaseDatasource): if granularity not in self.dttm_cols: granularity = self.main_dttm_col + # Database spec supports join-free timeslot grouping + time_groupby_inline = self.database.db_engine_spec.time_groupby_inline + cols = {col.column_name: col for col in self.columns} metrics_dict = {m.metric_name: m for m in self.metrics} @@ -386,7 +389,7 @@ class SqlaTable(Model, BaseDatasource): if timeseries_limit_metric: timeseries_limit_metric_expr = \ timeseries_limit_metric.sqla_col - if metrics: + if metrics_exprs: main_metric_expr = metrics_exprs[0] else: main_metric_expr = literal_column("COUNT(*)").label("ccount") @@ -436,13 +439,20 @@ class SqlaTable(Model, BaseDatasource): dttm_col = cols[granularity] time_grain = extras.get('time_grain_sqla') + time_filters = [] if is_timeseries: timestamp = dttm_col.get_timestamp_expression(time_grain) select_exprs += [timestamp] groupby_exprs += [timestamp] - time_filter = dttm_col.get_time_filter(from_dttm, to_dttm) + # Use main dttm column to support index with secondary dttm columns + if self.database.db_engine_spec.time_secondary_columns and \ + self.main_dttm_col in self.dttm_cols and \ + self.main_dttm_col != dttm_col.column_name: + time_filters.append(cols[self.main_dttm_col]. + get_time_filter(from_dttm, to_dttm)) + time_filters.append(dttm_col.get_time_filter(from_dttm, to_dttm)) select_exprs += metrics_exprs qry = sa.select(select_exprs) @@ -509,7 +519,7 @@ class SqlaTable(Model, BaseDatasource): having = template_processor.process_template(having) having_clause_and += [sa.text('({})'.format(having))] if granularity: - qry = qry.where(and_(*([time_filter] + where_clause_and))) + qry = qry.where(and_(*(time_filters + where_clause_and))) else: qry = qry.where(and_(*where_clause_and)) qry = qry.having(and_(*having_clause_and)) @@ -522,7 +532,8 @@ class SqlaTable(Model, BaseDatasource): qry = qry.limit(row_limit) - if is_timeseries and timeseries_limit and groupby: + if is_timeseries and \ + timeseries_limit and groupby and not time_groupby_inline: # some sql dialects require for order by expressions # to also be in the select clause -- others, e.g. vertica, # require a unique inner alias @@ -620,8 +631,7 @@ class SqlaTable(Model, BaseDatasource): if not any_date_col and dbcol.is_time: any_date_col = col.name - quoted = "{}".format( - column(dbcol.column_name).compile(dialect=db_dialect)) + quoted = "{}".format(col.compile(dialect=db_dialect)) if dbcol.sum: metrics.append(M( metric_name='sum__' + dbcol.column_name, diff --git a/superset/db_engine_specs.py b/superset/db_engine_specs.py index 75db166cb..ec0689642 100644 --- a/superset/db_engine_specs.py +++ b/superset/db_engine_specs.py @@ -50,7 +50,9 @@ class BaseEngineSpec(object): engine = 'base' # str as defined in sqlalchemy.engine.engine cursor_execute_kwargs = {} time_grains = tuple() + time_groupby_inline = False limit_method = LimitMethod.FETCH_MANY + time_secondary_columns = False @classmethod def fetch_data(cls, cursor, limit): @@ -865,6 +867,44 @@ class AthenaEngineSpec(BaseEngineSpec): def epoch_to_dttm(cls): return "from_unixtime({col})" + +class ClickHouseEngineSpec(BaseEngineSpec): + """Dialect for ClickHouse analytical DB.""" + + engine = 'clickhouse' + + time_secondary_columns = True + time_groupby_inline = True + time_grains = ( + Grain('Time Column', _('Time Column'), '{col}'), + Grain('minute', _('minute'), + "toStartOfMinute(toDateTime({col}))"), + Grain('5 minute', _('5 minute'), + "toDateTime(intDiv(toUInt32(toDateTime({col})), 300)*300)"), + Grain('10 minute', _('10 minute'), + "toDateTime(intDiv(toUInt32(toDateTime({col})), 600)*600)"), + Grain('hour', _('hour'), + "toStartOfHour(toDateTime({col}))"), + Grain('day', _('day'), + "toStartOfDay(toDateTime({col}))"), + Grain('month', _('month'), + "toStartOfMonth(toDateTime({col}))"), + Grain('quarter', _('quarter'), + "toStartOfQuarter(toDateTime({col}))"), + Grain('year', _('year'), + "toStartOfYear(toDateTime({col}))"), + ) + + @classmethod + def convert_dttm(cls, target_type, dttm): + tt = target_type.upper() + if tt == 'DATE': + return "toDate('{}')".format(dttm.strftime('%Y-%m-%d')) + if tt == 'DATETIME': + return "toDateTime('{}')".format( + dttm.strftime('%Y-%m-%d %H:%M:%S')) + return "'{}'".format(dttm.strftime('%Y-%m-%d %H:%M:%S')) + engines = { o.engine: o for o in globals().values() if inspect.isclass(o) and issubclass(o, BaseEngineSpec)} diff --git a/superset/viz.py b/superset/viz.py index 608e98a4d..bc147bb82 100755 --- a/superset/viz.py +++ b/superset/viz.py @@ -130,7 +130,9 @@ class BaseViz(object): # [column_name in list_of_values]. `__` prefix is there to avoid # potential conflicts with column that would be named `from` or `to` since = ( - extra_filters.get('__from') or form_data.get("since", "1 year ago") + extra_filters.get('__from') or + form_data.get("since") or + config.get("SUPERSET_DEFAULT_SINCE", "1 year ago") ) from_dttm = utils.parse_human_datetime(since)