Merge pull request #40 from mistercrunch/time_compare

Introducing time comparison
This commit is contained in:
Maxime Beauchemin 2015-10-04 16:20:55 -07:00
commit 8eb0c0a07b
7 changed files with 112 additions and 44 deletions

View File

@ -11,12 +11,6 @@ List of TODO items for Panoramix
* csv export out of table view
* in/notin filters autocomplete
## First Class Line Charts
* Contribution to total (added to line chart already)
* Time comparison
* Time ratios
* Line types (dash, dotted)
## New Features
* Annotations layers
* Add a per-datasource permission

View File

@ -1,6 +1,7 @@
#!/usr/bin/env python
import csv
from datetime import datetime
import gzip
import json
from subprocess import Popen
@ -8,7 +9,7 @@ from subprocess import Popen
from flask.ext.script import Manager
from flask.ext.migrate import MigrateCommand
from panoramix import db
from sqlalchemy import Column, Integer, String, Table
from sqlalchemy import Column, Integer, String, Table, DateTime
from panoramix import app
from panoramix import models
@ -59,7 +60,7 @@ def load_examples(sample):
Column("year", Integer),
Column("name", String(128)),
Column("num", Integer),
Column("ds", String(20)),
Column("ds", DateTime),
Column("gender", String(10)),
Column("sum_boys", Integer),
Column("sum_girls", Integer),
@ -78,7 +79,7 @@ def load_examples(sample):
continue
if num == "NA":
num = 0
ds = str(year) + '-01-01'
ds = datetime(int(year), 1, 1)
db.engine.execute(
BirthNames.insert(),
state=state,

View File

@ -1,6 +1,7 @@
from wtforms import (
Field, Form, SelectMultipleField, SelectField, TextField, TextAreaField,
BooleanField, IntegerField, HiddenField)
from wtforms import validators
from copy import copy
from panoramix import app
config = app.config
@ -58,7 +59,7 @@ class FormFactory(object):
description="One or many metrics to display"),
'groupby': SelectMultipleField(
'Group by',
choices=[(s, s) for s in datasource.groupby_column_names],
choices=self.choicify(datasource.groupby_column_names),
description="One or many fields to group by"),
'granularity': TextField(
'Time Granularity', default="one day",
@ -75,19 +76,24 @@ class FormFactory(object):
SelectField(
'Row limit',
default=config.get("ROW_LIMIT"),
choices=[(s, s) for s in self.row_limits]),
choices=self.choicify(self.row_limits)),
'limit':
SelectField(
'Series limit', choices=[(s, s) for s in self.series_limits],
'Series limit',
choices=self.choicify(self.series_limits),
default=50,
description=(
"Limits the number of time series that get displayed")),
'rolling_type': SelectField(
'Rolling',
default='mean',
choices=[(s, s) for s in ['mean', 'sum', 'std']],
description=(
"Defines a rolling window function to apply")),
'rolling_periods': TextField('Periods', description=(
'rolling_periods': IntegerField(
'Periods',
validators=[validators.optional()],
description=(
"Defines the size of the rolling window function, "
"relative to the 'granularity' field")),
'series': SelectField(
@ -118,7 +124,7 @@ class FormFactory(object):
description="Suffix to apply after the percentage display"),
'markup_type': SelectField(
"Markup Type",
choices=[(s, s) for s in ['markdown', 'html']],
choices=self.choicify(['markdown', 'html']),
default="markdown",
description="Pick your favorite markup language"),
'rotation': SelectField(
@ -128,9 +134,9 @@ class FormFactory(object):
description="Rotation to apply to words in the cloud"),
'line_interpolation': SelectField(
"Line Interpolation",
choices=[(s, s) for s in [
choices=self.choicify([
'linear', 'basis', 'cardinal', 'monotone',
'step-before', 'step-after']],
'step-before', 'step-after']),
default='linear',
description="Line interpolation as defined by d3.js"),
'code': TextAreaField("Code", description="Put your code here"),
@ -168,11 +174,24 @@ class FormFactory(object):
description="Compute the contribution to the total"),
'num_period_compare': IntegerField(
"Period Ratio", default=None,
validators=[validators.optional()],
description=(
"Number of period to compare against, "
"[integer] Number of period to compare against, "
"this is relative to the granularity selected")),
'time_compare': TextField(
"Time Shift Compare",
default="1 week ago",
description=(
"Overlay a timeseries from a "
"relative time period. Expects relative time delta "
"in natural language (example: 24 hours, 7 days, "
"56 weeks, 365 days")),
}
@staticmethod
def choicify(l):
return [("{}".format(obj), "{}".format(obj)) for obj in l]
def get_form(self, previous=False):
px_form_fields = self.field_dict
viz = self.viz
@ -200,16 +219,16 @@ class FormFactory(object):
json = HiddenField()
previous_viz_type = HiddenField()
filter_cols = datasource.filterable_column_names or ['']
for i in range(10):
setattr(QueryForm, 'flt_col_' + str(i), SelectField(
'Filter 1',
default='',
choices=[(s, s) for s in datasource.filterable_column_names]))
default=filter_cols[0],
choices=self.choicify(filter_cols)))
setattr(QueryForm, 'flt_op_' + str(i), SelectField(
'Filter 1',
default='',
choices=[(m, m) for m in ['in', 'not in']]))
default='in',
choices=self.choicify(['in', 'not in'])))
setattr(
QueryForm, 'flt_eq_' + str(i),
TextField("Super", default=''))

View File

@ -327,6 +327,7 @@ class Table(Model, Queryable, AuditMixinNullable):
filter=None,
is_timeseries=True,
timeseries_limit=15, row_limit=None,
inner_from_dttm=None, inner_to_dttm=None,
extras=None):
qry_start_dttm = datetime.now()
@ -363,10 +364,17 @@ class Table(Model, Queryable, AuditMixinNullable):
from_clause = table(self.table_name)
qry = qry.group_by(*groupby_exprs)
where_clause_and = [
time_filter = [
timestamp >= from_dttm.isoformat(),
timestamp < to_dttm.isoformat(),
timestamp <= to_dttm.isoformat(),
]
inner_time_filter = copy(time_filter)
if inner_from_dttm:
inner_time_filter[0] = timestamp >= inner_from_dttm.isoformat()
if inner_to_dttm:
inner_time_filter[1] = timestamp <= inner_to_dttm.isoformat()
where_clause_and = []
for col, op, eq in filter:
if op in ('in', 'not in'):
values = eq.split(",")
@ -376,14 +384,14 @@ class Table(Model, Queryable, AuditMixinNullable):
where_clause_and.append(cond)
if extras and 'where' in extras:
where_clause_and += [text(extras['where'])]
qry = qry.where(and_(*where_clause_and))
qry = qry.where(and_(*(time_filter + where_clause_and)))
qry = qry.order_by(desc(main_metric_expr))
qry = qry.limit(row_limit)
if timeseries_limit and groupby:
subq = select(inner_groupby_exprs)
subq = subq.select_from(table(self.table_name))
subq = subq.where(and_(*where_clause_and))
subq = subq.where(and_(*(where_clause_and + inner_time_filter)))
subq = subq.group_by(*inner_groupby_exprs)
subq = subq.order_by(desc(main_metric_expr))
subq = subq.limit(timeseries_limit)
@ -677,9 +685,13 @@ class Datasource(Model, AuditMixin, Queryable):
is_timeseries=True,
timeseries_limit=None,
row_limit=None,
inner_from_dttm=None, inner_to_dttm=None,
extras=None):
qry_start_dttm = datetime.now()
inner_from_dttm = inner_from_dttm or from_dttm
inner_to_dttm = inner_to_dttm or to_dttm
# add tzinfo to native datetime with config
from_dttm = from_dttm.replace(tzinfo=config.get("DRUID_TZ"))
to_dttm = to_dttm.replace(tzinfo=config.get("DRUID_TZ"))
@ -738,6 +750,7 @@ class Datasource(Model, AuditMixin, Queryable):
pre_qry['limit_spec'] = {
"type": "default",
"limit": timeseries_limit,
'intervals': inner_from_dttm.isoformat() + '/' + inner_to_dttm.isoformat(),
"columns": [{
"dimension": metrics[0] if metrics else self.metrics[0],
"direction": "descending",

View File

@ -50,7 +50,9 @@ function viz_nvd3(token_name, json_callback) {
chart.yAxis.tickFormat(d3.format('.3s'));
if (viz.form_data.contribution || viz.form_data.num_period_compare) {
chart.yAxis.tickFormat(d3.format('.3p'));
chart.y2Axis.tickFormat(d3.format('.3p'));
if (chart.y2Axis != undefined) {
chart.y2Axis.tickFormat(d3.format('.3p'));
}
}
} else if (viz_type === 'dist_bar') {

View File

@ -98,6 +98,9 @@
data-toggle="modal" data-target="#query_modal">query</span>
</h3>
<hr/>
{% block messages %}
{% endblock %}
{% include 'appbuilder/flash.html' %}
<div class="viz" style="height: 700px;">
{% block viz_html %}
{% if viz.error_msg %}

View File

@ -1,4 +1,5 @@
from collections import OrderedDict, defaultdict
from copy import copy
from datetime import datetime
import json
import uuid
@ -40,7 +41,12 @@ class BaseViz(object):
form = form_class(form_data)
else:
form = form_class(**form_data)
data = form.data.copy()
if not form.validate():
for k, v in form.errors.items():
if not data.get('json') and not data.get('async'):
flash("{}: {}".format(k, " ".join(v)), 'danger')
previous_viz_type = form_data.get('previous_viz_type')
if previous_viz_type in viz_types and previous_viz_type != self.viz_type:
data = {
@ -85,11 +91,14 @@ class BaseViz(object):
'{self.datasource.id}/'.format(**locals()))
return href(d)
def get_df(self):
def get_df(self, query_obj=None):
if not query_obj:
query_obj = self.query_obj()
self.error_msg = ""
self.results = None
self.results = self.bake_query()
self.results = self.datasource.query(**query_obj)
df = self.results.df
if df is None or df.empty:
raise Exception("No data, review your incantations!")
@ -118,9 +127,6 @@ class BaseViz(object):
filters.append((col, op, eq))
return filters
def bake_query(self):
return self.datasource.query(**self.query_obj())
def query_obj(self):
"""
Building a query object
@ -260,7 +266,10 @@ class NVD3Viz(BaseViz):
'nv.d3.min.js',
'widgets/viz_nvd3.js',
]
css_files = ['nv.d3.css']
css_files = [
'nv.d3.css',
'widgets/viz_nvd3.css',
]
class BubbleViz(NVD3Viz):
@ -387,34 +396,36 @@ class NVD3TimeSeriesViz(NVD3Viz):
'metrics',
'groupby', 'limit',
('rolling_type', 'rolling_periods'),
('num_period_compare', 'line_interpolation'),
('time_compare', 'num_period_compare'),
('line_interpolation', None),
('show_brush', 'show_legend'),
('rich_tooltip', 'y_axis_zero'),
('y_log_scale', 'contribution')
('y_log_scale', 'contribution'),
]
def get_df(self):
def get_df(self, query_obj=None):
form_data = self.form_data
df = super(NVD3TimeSeriesViz, self).get_df()
df = super(NVD3TimeSeriesViz, self).get_df(query_obj)
df = df.fillna(0)
if form_data.get("granularity") == "all":
raise Exception("Pick a time granularity for your time series")
df = df.pivot_table(
index="timestamp",
columns=self.form_data.get('groupby'),
values=self.form_data.get('metrics'))
columns=form_data.get('groupby'),
values=form_data.get('metrics'))
if self.sort_series:
dfs = df.sum()
dfs.sort(ascending=False)
df = df[dfs.index]
if self.form_data.get("contribution") == "y":
if form_data.get("contribution"):
dft = df.T
df = (dft / dft.sum()).T
num_period_compare = self.form_data.get("num_period_compare")
num_period_compare = form_data.get("num_period_compare")
if num_period_compare:
num_period_compare = int(num_period_compare)
df = df / df.shift(num_period_compare)
@ -431,8 +442,7 @@ class NVD3TimeSeriesViz(NVD3Viz):
df = pd.rolling_sum(df, int(rolling_periods))
return df
def get_json_data(self):
df = self.get_df()
def to_series(self, df, classed='', title_suffix=''):
series = df.to_dict('series')
chart_data = []
for name in df.T.index.tolist():
@ -448,14 +458,40 @@ class NVD3TimeSeriesViz(NVD3Viz):
series_title = ", ".join(name)
else:
series_title = ", ".join(name[1:])
color = utils.color(series_title)
if title_suffix:
series_title += title_suffix
d = {
"key": series_title,
"color": utils.color(series_title),
"color": color,
"classed": classed,
"values": [
{'x': ds, 'y': ys[i]}
for i, ds in enumerate(df.timestamp)]
}
chart_data.append(d)
return chart_data
def get_json_data(self):
df = self.get_df()
chart_data = self.to_series(df)
time_compare = self.form_data.get('time_compare')
if time_compare:
query_object = self.query_obj()
delta = utils.parse_human_timedelta(time_compare)
query_object['inner_from_dttm'] = query_object['from_dttm']
query_object['inner_to_dttm'] = query_object['to_dttm']
query_object['from_dttm'] -= delta
query_object['to_dttm'] -= delta
df2 = self.get_df(query_object)
df2.index += delta
chart_data += self.to_series(
df2, classed='dashed', title_suffix="---")
chart_data = sorted(chart_data, key=lambda x: x['key'])
data = {
'chart_data': chart_data,
'query': self.results.query,