Using inheritance scheme to organize db specific code (#1294)

* Using inheritance scheme to organize db specific code

* Addressing comments
This commit is contained in:
Maxime Beauchemin 2016-10-12 13:50:47 -07:00 committed by GitHub
parent 8626c80d3a
commit ef2670ca32
7 changed files with 248 additions and 133 deletions

View File

@ -212,7 +212,7 @@ def load_world_bank_health_n_pop():
"metrics": ["sum__SP_POP_TOTL"],
"row_limit": config.get("ROW_LIMIT"),
"since": "2014-01-01",
"until": "2014-01-01",
"until": "2014-01-02",
"where": "",
"markup_type": "markdown",
"country_fieldtype": "cca3",
@ -285,7 +285,7 @@ def load_world_bank_health_n_pop():
defaults,
viz_type='bubble',
since="2011-01-01",
until="2011-01-01",
until="2011-01-02",
series="region",
limit="0",
entity="country_name",

213
caravel/db_engine_specs.py Normal file
View File

@ -0,0 +1,213 @@
"""Compatibility layer for different database engines
This modules stores logic specific to different database engines. Things
like time-related functions that are similar but not identical, or
information as to expose certain features or not and how to expose them.
For instance, Hive/Presto supports partitions and have a specific API to
list partitions. Other databases like Vertica also support partitions but
have different API to get to them. Other databases don't support partitions
at all. The classes here will use a common interface to specify all this.
The general idea is to use static classes and an inheritance scheme.
"""
from __future__ import absolute_import
from __future__ import division
from __future__ import print_function
from __future__ import unicode_literals
import inspect
from collections import namedtuple
from flask_babel import lazy_gettext as _
Grain = namedtuple('Grain', 'name label function')
class BaseEngineSpec(object):
engine = 'base' # str as defined in sqlalchemy.engine.engine
time_grains = tuple()
@classmethod
def epoch_to_dttm(cls):
raise NotImplementedError()
@classmethod
def epoch_ms_to_dttm(cls):
return cls.epoch_to_dttm().replace('{col}', '({col}/1000.0)')
@classmethod
def extra_table_metadata(cls, table):
"""Returns engine-specific table metadata"""
return {}
@classmethod
def convert_dttm(cls, target_type, dttm):
return "'{}'".format(dttm.strftime('%Y-%m-%d %H:%M:%S'))
class PostgresEngineSpec(BaseEngineSpec):
engine = 'postgres'
time_grains = (
Grain("Time Column", _('Time Column'), "{col}"),
Grain("second", _('second'), "DATE_TRUNC('second', {col})"),
Grain("minute", _('minute'), "DATE_TRUNC('minute', {col})"),
Grain("hour", _('hour'), "DATE_TRUNC('hour', {col})"),
Grain("day", _('day'), "DATE_TRUNC('day', {col})"),
Grain("week", _('week'), "DATE_TRUNC('week', {col})"),
Grain("month", _('month'), "DATE_TRUNC('month', {col})"),
Grain("year", _('year'), "DATE_TRUNC('year', {col})"),
)
@classmethod
def epoch_to_dttm(cls):
return "(timestamp 'epoch' + {col} * interval '1 second')"
@classmethod
def convert_dttm(cls, target_type, dttm):
return "'{}'".format(dttm.strftime('%Y-%m-%d %H:%M:%S'))
class SqliteEngineSpec(BaseEngineSpec):
engine = 'sqlite'
time_grains = (
Grain('Time Column', _('Time Column'), '{col}'),
Grain('day', _('day'), 'DATE({col})'),
Grain("week", _('week'),
"DATE({col}, -strftime('%w', {col}) || ' days')"),
Grain("month", _('month'),
"DATE({col}, -strftime('%d', {col}) || ' days')"),
)
@classmethod
def epoch_to_dttm(cls):
return "datetime({col}, 'unixepoch')"
@classmethod
def convert_dttm(cls, target_type, dttm):
iso = dttm.isoformat().replace('T', ' ')
if '.' not in iso:
iso += '.000000'
return "'{}'".format(iso)
class MySQLEngineSpec(BaseEngineSpec):
engine = 'mysql'
time_grains = (
Grain('Time Column', _('Time Column'), '{col}'),
Grain("second", _('second'), "DATE_ADD(DATE({col}), "
"INTERVAL (HOUR({col})*60*60 + MINUTE({col})*60"
" + SECOND({col})) SECOND)"),
Grain("minute", _('minute'), "DATE_ADD(DATE({col}), "
"INTERVAL (HOUR({col})*60 + MINUTE({col})) MINUTE)"),
Grain("hour", _('hour'), "DATE_ADD(DATE({col}), "
"INTERVAL HOUR({col}) HOUR)"),
Grain('day', _('day'), 'DATE({col})'),
Grain("week", _('week'), "DATE(DATE_SUB({col}, "
"INTERVAL DAYOFWEEK({col}) - 1 DAY))"),
Grain("month", _('month'), "DATE(DATE_SUB({col}, "
"INTERVAL DAYOFMONTH({col}) - 1 DAY))"),
)
@classmethod
def convert_dttm(cls, target_type, dttm):
if target_type.upper() in ('DATETIME', 'DATE'):
return "STR_TO_DATE('{}', '%Y-%m-%d %H:%i:%s')".format(
dttm.strftime('%Y-%m-%d %H:%M:%S'))
return "'{}'".format(dttm.strftime('%Y-%m-%d %H:%M:%S'))
@classmethod
def epoch_to_dttm(cls):
return "from_unixtime({col})"
class PrestoEngineSpec(BaseEngineSpec):
engine = 'presto'
time_grains = (
Grain('Time Column', _('Time Column'), '{col}'),
Grain('second', _('second'),
"date_trunc('second', CAST({col} AS TIMESTAMP))"),
Grain('minute', _('minute'),
"date_trunc('minute', CAST({col} AS TIMESTAMP))"),
Grain('hour', _('hour'),
"date_trunc('hour', CAST({col} AS TIMESTAMP))"),
Grain('day', _('day'),
"date_trunc('day', CAST({col} AS TIMESTAMP))"),
Grain('week', _('week'),
"date_trunc('week', CAST({col} AS TIMESTAMP))"),
Grain('month', _('month'),
"date_trunc('month', CAST({col} AS TIMESTAMP))"),
Grain('quarter', _('quarter'),
"date_trunc('quarter', CAST({col} AS TIMESTAMP))"),
Grain("week_ending_saturday", _('week_ending_saturday'),
"date_add('day', 5, date_trunc('week', date_add('day', 1, "
"CAST({col} AS TIMESTAMP))))"),
Grain("week_start_sunday", _('week_start_sunday'),
"date_add('day', -1, date_trunc('week', "
"date_add('day', 1, CAST({col} AS TIMESTAMP))))"),
)
@classmethod
def convert_dttm(cls, target_type, dttm):
if target_type.upper() in ('DATE', 'DATETIME'):
return "from_iso8601_date('{}')".format(dttm.isoformat())
return "'{}'".format(dttm.strftime('%Y-%m-%d %H:%M:%S'))
@classmethod
def epoch_to_dttm(cls):
return "from_unixtime({col})"
class MssqlEngineSpec(BaseEngineSpec):
engine = 'mssql'
epoch_to_dttm = "dateadd(S, {col}, '1970-01-01')"
time_grains = (
Grain("Time Column", _('Time Column'), "{col}"),
Grain("second", _('second'), "DATEADD(second, "
"DATEDIFF(second, '2000-01-01', {col}), '2000-01-01')"),
Grain("minute", _('minute'), "DATEADD(minute, "
"DATEDIFF(minute, 0, {col}), 0)"),
Grain("5 minute", _('5 minute'), "DATEADD(minute, "
"DATEDIFF(minute, 0, {col}) / 5 * 5, 0)"),
Grain("half hour", _('half hour'), "DATEADD(minute, "
"DATEDIFF(minute, 0, {col}) / 30 * 30, 0)"),
Grain("hour", _('hour'), "DATEADD(hour, "
"DATEDIFF(hour, 0, {col}), 0)"),
Grain("day", _('day'), "DATEADD(day, "
"DATEDIFF(day, 0, {col}), 0)"),
Grain("week", _('week'), "DATEADD(week, "
"DATEDIFF(week, 0, {col}), 0)"),
Grain("month", _('month'), "DATEADD(month, "
"DATEDIFF(month, 0, {col}), 0)"),
Grain("quarter", _('quarter'), "DATEADD(quarter, "
"DATEDIFF(quarter, 0, {col}), 0)"),
Grain("year", _('year'), "DATEADD(year, "
"DATEDIFF(year, 0, {col}), 0)"),
)
@classmethod
def convert_dttm(cls, target_type, dttm):
return "CONVERT(DATETIME, '{}', 126)".format(iso)
class RedshiftEngineSpec(PostgresEngineSpec):
engine = 'redshift'
class OracleEngineSpec(PostgresEngineSpec):
engine = 'oracle'
@classmethod
def convert_dttm(cls, target_type, dttm):
return (
"""TO_TIMESTAMP('{}', 'YYYY-MM-DD"T"HH24:MI:SS.ff6')"""
).format(dttm.isoformat())
class VerticaEngineSpec(PostgresEngineSpec):
engine = 'vertica'
engines = {
o.engine: o for o in globals().values()
if inspect.isclass(o) and issubclass(o, BaseEngineSpec)}

View File

@ -52,7 +52,7 @@ from sqlalchemy_utils import EncryptedType
from werkzeug.datastructures import ImmutableMultiDict
import caravel
from caravel import app, db, get_session, utils, sm
from caravel import app, db, db_engine_specs, get_session, utils, sm
from caravel.source_registry import SourceRegistry
from caravel.viz import viz_types
from caravel.utils import flasher, MetricPermException, DimSelector
@ -277,6 +277,12 @@ class Slice(Model, AuditMixinNullable, ImportMixin):
"{obj.datasource_id}/".format(obj=self))
return href(slice_params)
@property
def slice_id_url(self):
return (
"/caravel/{slc.datasource_type}/{slc.datasource_id}/{slc.id}/"
).format(slc=self)
@property
def edit_url(self):
return "/slicemodelview/edit/{}".format(self.id)
@ -678,6 +684,12 @@ class Database(Model, AuditMixinNullable):
def all_schema_names(self):
return sorted(self.inspector.get_schema_names())
@property
def db_engine_spec(self):
engine_name = self.get_sqla_engine().name or 'base'
return db_engine_specs.engines.get(
engine_name, db_engine_specs.BaseEngineSpec)
def grains(self):
"""Defines time granularity database-specific expressions.
@ -687,113 +699,11 @@ class Database(Model, AuditMixinNullable):
each database has slightly different but similar datetime functions,
this allows a mapping between database engines and actual functions.
"""
Grain = namedtuple('Grain', 'name label function')
db_time_grains = {
'presto': (
Grain('Time Column', _('Time Column'), '{col}'),
Grain('second', _('second'),
"date_trunc('second', CAST({col} AS TIMESTAMP))"),
Grain('minute', _('minute'),
"date_trunc('minute', CAST({col} AS TIMESTAMP))"),
Grain('hour', _('hour'),
"date_trunc('hour', CAST({col} AS TIMESTAMP))"),
Grain('day', _('day'),
"date_trunc('day', CAST({col} AS TIMESTAMP))"),
Grain('week', _('week'),
"date_trunc('week', CAST({col} AS TIMESTAMP))"),
Grain('month', _('month'),
"date_trunc('month', CAST({col} AS TIMESTAMP))"),
Grain('quarter', _('quarter'),
"date_trunc('quarter', CAST({col} AS TIMESTAMP))"),
Grain("week_ending_saturday", _('week_ending_saturday'),
"date_add('day', 5, date_trunc('week', date_add('day', 1, "
"CAST({col} AS TIMESTAMP))))"),
Grain("week_start_sunday", _('week_start_sunday'),
"date_add('day', -1, date_trunc('week', "
"date_add('day', 1, CAST({col} AS TIMESTAMP))))"),
),
'mysql': (
Grain('Time Column', _('Time Column'), '{col}'),
Grain("second", _('second'), "DATE_ADD(DATE({col}), "
"INTERVAL (HOUR({col})*60*60 + MINUTE({col})*60"
" + SECOND({col})) SECOND)"),
Grain("minute", _('minute'), "DATE_ADD(DATE({col}), "
"INTERVAL (HOUR({col})*60 + MINUTE({col})) MINUTE)"),
Grain("hour", _('hour'), "DATE_ADD(DATE({col}), "
"INTERVAL HOUR({col}) HOUR)"),
Grain('day', _('day'), 'DATE({col})'),
Grain("week", _('week'), "DATE(DATE_SUB({col}, "
"INTERVAL DAYOFWEEK({col}) - 1 DAY))"),
Grain("month", _('month'), "DATE(DATE_SUB({col}, "
"INTERVAL DAYOFMONTH({col}) - 1 DAY))"),
),
'sqlite': (
Grain('Time Column', _('Time Column'), '{col}'),
Grain('day', _('day'), 'DATE({col})'),
Grain("week", _('week'),
"DATE({col}, -strftime('%w', {col}) || ' days')"),
Grain("month", _('month'),
"DATE({col}, -strftime('%d', {col}) || ' days')"),
),
'postgresql': (
Grain("Time Column", _('Time Column'), "{col}"),
Grain("second", _('second'), "DATE_TRUNC('second', {col})"),
Grain("minute", _('minute'), "DATE_TRUNC('minute', {col})"),
Grain("hour", _('hour'), "DATE_TRUNC('hour', {col})"),
Grain("day", _('day'), "DATE_TRUNC('day', {col})"),
Grain("week", _('week'), "DATE_TRUNC('week', {col})"),
Grain("month", _('month'), "DATE_TRUNC('month', {col})"),
Grain("year", _('year'), "DATE_TRUNC('year', {col})"),
),
'mssql': (
Grain("Time Column", _('Time Column'), "{col}"),
Grain("second", _('second'), "DATEADD(second, "
"DATEDIFF(second, '2000-01-01', {col}), '2000-01-01')"),
Grain("minute", _('minute'), "DATEADD(minute, "
"DATEDIFF(minute, 0, {col}), 0)"),
Grain("5 minute", _('5 minute'), "DATEADD(minute, "
"DATEDIFF(minute, 0, {col}) / 5 * 5, 0)"),
Grain("half hour", _('half hour'), "DATEADD(minute, "
"DATEDIFF(minute, 0, {col}) / 30 * 30, 0)"),
Grain("hour", _('hour'), "DATEADD(hour, "
"DATEDIFF(hour, 0, {col}), 0)"),
Grain("day", _('day'), "DATEADD(day, "
"DATEDIFF(day, 0, {col}), 0)"),
Grain("week", _('week'), "DATEADD(week, "
"DATEDIFF(week, 0, {col}), 0)"),
Grain("month", _('month'), "DATEADD(month, "
"DATEDIFF(month, 0, {col}), 0)"),
Grain("quarter", _('quarter'), "DATEADD(quarter, "
"DATEDIFF(quarter, 0, {col}), 0)"),
Grain("year", _('year'), "DATEADD(year, "
"DATEDIFF(year, 0, {col}), 0)"),
),
}
db_time_grains['redshift'] = db_time_grains['postgresql']
db_time_grains['vertica'] = db_time_grains['postgresql']
for db_type, grains in db_time_grains.items():
if self.sqlalchemy_uri.startswith(db_type):
return grains
return self.db_engine_spec.time_grains
def grains_dict(self):
return {grain.name: grain for grain in self.grains()}
def epoch_to_dttm(self, ms=False):
"""Database-specific SQL to convert unix timestamp to datetime
"""
ts2date_exprs = {
'sqlite': "datetime({col}, 'unixepoch')",
'postgresql': "(timestamp 'epoch' + {col} * interval '1 second')",
'mysql': "from_unixtime({col})",
'mssql': "dateadd(S, {col}, '1970-01-01')"
}
ts2date_exprs['redshift'] = ts2date_exprs['postgresql']
ts2date_exprs['vertica'] = ts2date_exprs['postgresql']
for db_type, expr in ts2date_exprs.items():
if self.sqlalchemy_uri.startswith(db_type):
return expr.replace('{col}', '({col}/1000.0)') if ms else expr
raise Exception(_("Unable to convert unix epoch to datetime"))
def get_extra(self):
extra = {}
if self.extra:
@ -1028,12 +938,13 @@ class SqlaTable(Model, Queryable, AuditMixinNullable, ImportMixin):
# Transforming time grain into an expression based on configuration
time_grain_sqla = extras.get('time_grain_sqla')
if time_grain_sqla:
db_engine_spec = self.database.db_engine_spec
if dttm_col.python_date_format == 'epoch_s':
dttm_expr = self.database.epoch_to_dttm().format(
col=dttm_expr)
dttm_expr = \
db_engine_spec.epoch_to_dttm().format(col=dttm_expr)
elif dttm_col.python_date_format == 'epoch_ms':
dttm_expr = self.database.epoch_to_dttm(ms=True).format(
col=dttm_expr)
dttm_expr = \
db_engine_spec.epoch_ms_to_dttm().format(col=dttm_expr)
udf = self.database.grains_dict().get(time_grain_sqla, '{col}')
timestamp_grain = literal_column(
udf.function.format(col=dttm_expr), type_=DateTime).label('timestamp')
@ -1434,7 +1345,7 @@ class TableColumn(Model, AuditMixinNullable, ImportMixin):
return column_to_import
def dttm_sql_literal(self, dttm):
"""Convert datetime object to string
"""Convert datetime object to a SQL expression string
If database_expression is empty, the internal dttm
will be parsed as the string with the pattern that
@ -1442,6 +1353,7 @@ class TableColumn(Model, AuditMixinNullable, ImportMixin):
If database_expression is not empty, the internal dttm
will be parsed as the sql sentence for the database to convert
"""
tf = self.python_date_format or '%Y-%m-%d %H:%M:%S.%f'
if self.database_expression:
return self.database_expression.format(dttm.strftime('%Y-%m-%d %H:%M:%S'))
@ -1450,21 +1362,9 @@ class TableColumn(Model, AuditMixinNullable, ImportMixin):
elif tf == 'epoch_ms':
return str((dttm - datetime(1970, 1, 1)).total_seconds() * 1000.0)
else:
default = "'{}'".format(dttm.strftime(tf))
iso = dttm.isoformat()
d = {
'mssql': "CONVERT(DATETIME, '{}', 126)".format(iso), # untested
'mysql': default,
'oracle':
"""TO_TIMESTAMP('{}', 'YYYY-MM-DD"T"HH24:MI:SS.ff6')""".format(
dttm.isoformat()),
'presto': default,
'sqlite': default,
}
for k, v in d.items():
if self.table.database.sqlalchemy_uri.startswith(k):
return v
return default
s = self.table.database.db_engine_spec.convert_dttm(
self.type, dttm)
return s or "'{}'".format(dttm.strftime(tf))
class DruidCluster(Model, AuditMixinNullable):

View File

@ -31,7 +31,7 @@ class CaravelException(Exception):
pass
class CaravelTimeoutException(Exception):
class CaravelTimeoutException(CaravelException):
pass
@ -39,7 +39,11 @@ class CaravelSecurityException(CaravelException):
pass
class MetricPermException(Exception):
class MetricPermException(CaravelException):
pass
class NoDataException(CaravelException):
pass

View File

@ -175,7 +175,7 @@ class BaseViz(object):
# If the datetime format is unix, the parse will use the corresponding
# parsing logic.
if df is None or df.empty:
raise Exception("No data, review your incantations!")
raise utils.NoDataException("No data.")
else:
if 'timestamp' in df.columns:
if timestamp_format in ("epoch_s", "epoch_ms"):

View File

@ -5,4 +5,4 @@ export CARAVEL_CONFIG=tests.caravel_test_config
set -e
caravel/bin/caravel version -v
export SOLO_TEST=1
nosetests tests.core_tests:CoreTests.test_slice_endpoint
nosetests tests.core_tests:CoreTests.test_slices

View File

@ -130,8 +130,7 @@ class CoreTests(CaravelTestCase):
(slc.slice_name, 'slice_url', slc.slice_url),
(slc.slice_name, 'json_endpoint', slc.viz.json_endpoint),
(slc.slice_name, 'csv_endpoint', slc.viz.csv_endpoint),
(slc.slice_name, 'slice_id_url',
"/caravel/{slc.datasource_type}/{slc.datasource_id}/{slc.id}/".format(slc=slc)),
(slc.slice_name, 'slice_id_url', slc.slice_id_url),
]
for name, method, url in urls:
print("[{name}]/[{method}]: {url}".format(**locals()))
@ -401,7 +400,6 @@ class CoreTests(CaravelTestCase):
resp = self.get_resp('/dashboardmodelview/list/')
assert "/caravel/dashboard/births/" in resp
print(self.get_resp('/caravel/dashboard/births/'))
assert 'Births' in self.get_resp('/caravel/dashboard/births/')
# Confirm that public doesn't have access to other datasets.