From 2e6b4b121fe2a1cf6d23afc153e7383eb01302cb Mon Sep 17 00:00:00 2001 From: Ryan Ye Date: Wed, 14 Sep 2016 02:58:47 +0800 Subject: [PATCH] Time grain support for unix-timestamp columns (#1093) * Add time grain support for time columnd in unix timestamp * Fix datetime parsing for unix epoch Since we've already converted unix epoch to datetime type, we shouldn't specify 'unit' parameter in pandas.to_datetime * Fix SQLite timestamp to datetime conversion --- caravel/models.py | 22 ++++++++++++++++++++++ caravel/viz.py | 7 ++----- 2 files changed, 24 insertions(+), 5 deletions(-) diff --git a/caravel/models.py b/caravel/models.py index b8b550612..105de8949 100644 --- a/caravel/models.py +++ b/caravel/models.py @@ -591,6 +591,22 @@ class Database(Model, AuditMixinNullable): def grains_dict(self): return {grain.name: grain for grain in self.grains()} + def epoch_to_dttm(self, ms=False): + """Database-specific SQL to convert unix timestamp to datetime + """ + ts2date_exprs = { + 'sqlite': "datetime({col}, 'unixepoch')", + 'postgresql': "(timestamp 'epoch' + {col} * interval '1 second')", + 'mysql': "from_unixtime({col})", + 'mssql': "dateadd(S, {col}, '1970-01-01')" + } + ts2date_exprs['redshift'] = ts2date_exprs['postgresql'] + ts2date_exprs['vertica'] = ts2date_exprs['postgresql'] + for db_type, expr in ts2date_exprs.items(): + if self.sqlalchemy_uri.startswith(db_type): + return expr.replace('{col}', '({col}/1000.0)') if ms else expr + raise Exception(_("Unable to convert unix epoch to datetime")) + def get_extra(self): extra = {} if self.extra: @@ -795,6 +811,12 @@ class SqlaTable(Model, Queryable, AuditMixinNullable): # Transforming time grain into an expression based on configuration time_grain_sqla = extras.get('time_grain_sqla') if time_grain_sqla: + if dttm_col.python_date_format == 'epoch_s': + dttm_expr = self.database.epoch_to_dttm().format( + col=dttm_expr) + elif dttm_col.python_date_format == 'epoch_ms': + dttm_expr = self.database.epoch_to_dttm(ms=True).format( + col=dttm_expr) udf = self.database.grains_dict().get(time_grain_sqla, '{col}') timestamp_grain = literal_column( udf.function.format(col=dttm_expr)).label('timestamp') diff --git a/caravel/viz.py b/caravel/viz.py index 61f3d5d75..181c87efb 100755 --- a/caravel/viz.py +++ b/caravel/viz.py @@ -172,12 +172,9 @@ class BaseViz(object): raise Exception("No data, review your incantations!") else: if 'timestamp' in df.columns: - if timestamp_format == "epoch_s": + if timestamp_format in ("epoch_s", "epoch_ms"): df.timestamp = pd.to_datetime( - df.timestamp, utc=False, unit="s") - elif timestamp_format == "epoch_ms": - df.timestamp = pd.to_datetime( - df.timestamp, utc=False, unit="ms") + df.timestamp, utc=False) else: df.timestamp = pd.to_datetime( df.timestamp, utc=False, format=timestamp_format)