Checkpoint

This commit is contained in:
Maxime 2015-07-06 15:56:41 +00:00
parent 9f1204605f
commit 059c02aed0
3 changed files with 392 additions and 138 deletions

342
app.py
View File

@ -1,79 +1,61 @@
from pydruid import client from pydruid import client
from pydruid.utils.filters import Dimension from pydruid.utils.filters import Dimension, Filter
from dateutil.parser import parse from dateutil.parser import parse
from datetime import datetime, timedelta from datetime import datetime, timedelta
from flask import Flask, render_template, request, flash from flask import Flask, render_template, request, flash
from flask_bootstrap import Bootstrap from flask_bootstrap import Bootstrap
import json
from wtforms import Form, SelectMultipleField, SelectField, TextField from wtforms import Form, SelectMultipleField, SelectField, TextField
import pandas as pd import pandas as pd
from pandas_highcharts.core import serialize from pandas_highcharts.core import serialize
pd.set_option('display.max_colwidth', -1) pd.set_option('display.max_colwidth', -1)
since_l = {
ROW_LIMIT = 10000 '1hour': timedelta(hours=1),
PORT = 8088 '1day': timedelta(days=1),
CHART_ARGS = { '7days': timedelta(days=7),
'figsize': (None, 700), '28days': timedelta(days=28),
'title': None, 'all': timedelta(days=365*100)
}
query = client.PyDruid("http://10.181.47.80:8080", 'druid/v2')
app = Flask(__name__)
Bootstrap(app)
class BaseViz(object):
template = "panoramix/datasource.html"
def __init__(self):
pass
def form_class(self):
pass
viz_types = {
'table': 'Table',
'line': 'Time Series - Line',
'bar': 'Time Series - Bar',
'bar_distro': 'Distribution - Bar',
} }
def latest_metadata(datasource): metric = "count"
max_time = query.time_boundary(datasource=datasource)[0]['result']['maxTime']
max_time = parse(max_time)
intervals = (max_time - timedelta(seconds=1)).isoformat() + '/'
intervals += max_time.isoformat()
return query.segment_metadata(
datasource=datasource,
intervals=intervals)[-1]['columns']
@app.route("/datasource/<datasource>/")
def datasource(datasource):
metadata = latest_metadata(datasource) class DruidDataSource(object):
def __init__(self, name):
self.name = name
self.cols = self.latest_metadata()
self.col_names = sorted([col for col in self.cols.keys()])
def latest_metadata(self):
max_time = query.time_boundary(
datasource=self.name)[0]['result']['maxTime']
max_time = parse(max_time)
intervals = (max_time - timedelta(seconds=1)).isoformat() + '/'
intervals += max_time.isoformat()
return query.segment_metadata(
datasource=self.name,
intervals=intervals)[-1]['columns']
def form_factory(datasource, form_args=None):
grain = ['all', 'none', 'minute', 'hour', 'day'] grain = ['all', 'none', 'minute', 'hour', 'day']
since_l = {
'1hour': timedelta(hours=1),
'1day': timedelta(days=1),
'7days': timedelta(days=7),
'28days': timedelta(days=28),
'all': timedelta(days=365*100)
}
limits = [0, 5, 10, 25, 50, 100, 500] limits = [0, 5, 10, 25, 50, 100, 500]
limit = request.args.get("limit")
try: if form_args:
limit = int(limit) limit = form_args.get("limit")
if limit not in limits: try:
limits.append(limit) limit = int(limit)
limits = sorted(limits) if limit not in limits:
except: limits.append(limit)
pass limits = sorted(limits)
except:
pass
class QueryForm(Form): class QueryForm(Form):
viz_type = SelectField( viz_type = SelectField(
'Viz', choices=[v for v in viz_types.items()]) 'Viz', choices=[(k, v.verbose_name) for k, v in viz_types.items()])
groupby = SelectMultipleField( groupby = SelectMultipleField(
'Group by', choices=[(m, m) for m in sorted(metadata.keys())]) 'Group by', choices=[(m, m) for m in datasource.col_names])
granularity = SelectField( granularity = SelectField(
'Granularity', choices=[(g, g) for g in grain]) 'Granularity', choices=[(g, g) for g in grain])
since = SelectField( since = SelectField(
@ -81,108 +63,192 @@ def datasource(datasource):
limit = SelectField( limit = SelectField(
'Limit', choices=[(s, s) for s in limits]) 'Limit', choices=[(s, s) for s in limits])
flt_col_1 = SelectField( flt_col_1 = SelectField(
'Filter 1', choices=[(m, m) for m in sorted(metadata.keys())]) 'Filter 1', choices=[(m, m) for m in datasource.col_names])
flt_op_1 = SelectField( flt_op_1 = SelectField(
'Filter 1', choices=[(m, m) for m in ['==', 'in', '<', '>']]) 'Filter 1', choices=[(m, m) for m in ['==', '!=', 'in',]])
flt_eq_1 = TextField("Super") flt_eq_1 = TextField("Super")
return QueryForm
groupby = request.args.getlist("groupby") or []
granularity = request.args.get("granularity")
metric = "count"
limit = int(request.args.get("limit", ROW_LIMIT)) or ROW_LIMIT
since = request.args.get("since", "all")
from_dttm = (datetime.now() - since_l[since]).isoformat()
# Building filters class BaseViz(object):
i = 1 verbose_name = "Base Viz"
filters = [] template = "panoramix/datasource.html"
while True: def __init__(self, datasource, form_class, form_data):
col = request.args.get("flt_col_" + str(i)) self.datasource = datasource
op = request.args.get("flt_op_" + str(i)) self.form_class = form_class
eq = request.args.get("flt_eq_" + str(i)) self.form_data = form_data
print (col,op,eq) self.df = self.bake_query()
if col and op and eq: if self.df is not None:
filters.append(Dimension(col)==eq) self.df.timestamp = pd.to_datetime(self.df.timestamp)
filters = Dimension(col)==eq self.df_prep()
self.form_prep()
def bake_query(self):
ds = self.datasource
args = self.form_data
groupby = args.getlist("groupby") or []
granularity = args.get("granularity")
metric = "count"
limit = int(args.get("limit", ROW_LIMIT)) or ROW_LIMIT
since = args.get("since", "all")
from_dttm = (datetime.now() - since_l[since]).isoformat()
# Building filters
i = 1
filters = None
while True:
col = args.get("flt_col_" + str(i))
op = args.get("flt_op_" + str(i))
eq = args.get("flt_eq_" + str(i))
if col and op and eq:
cond = None
if op == '==':
cond = Dimension(col)==eq
elif op == '!=':
cond = ~(Dimension(col)==eq)
elif op == 'in':
fields = []
for s in eq.split(','):
s = s.strip()
fields.append(Filter.build_filter(Dimension(col)==s))
cond = Filter(type="or", fields=fields)
if filters:
filters = cond and filters
else:
filters = cond
else:
break
i += 1
kw = {}
if filters:
kw['filter'] = filters
query.groupby(
datasource=ds.name,
granularity=granularity or 'all',
intervals=from_dttm + '/' + datetime.now().isoformat(),
dimensions=groupby,
aggregations={"count": client.doublesum(metric)},
#filter=filters,
limit_spec={
"type": "default",
"limit": limit,
"columns": [{
"dimension" : metric,
"direction" : "descending",
},],
},
**kw
)
return query.export_pandas()
def df_prep(self, ):
pass
def form_prep(self):
pass
def render(self, *args, **kwargs):
form = self.form_class(self.form_data)
return render_template(
self.template, form=form)
class TableViz(BaseViz):
verbose_name = "Table View"
template = 'panoramix/viz_table.html'
def render(self):
form = self.form_class(self.form_data)
if self.df is None or self.df.empty:
flash("No data.", "error")
table = None
else: else:
break if self.form_data.get("granularity") == "all":
i += 1 del self.df['timestamp']
table = self.df.to_html(
classes=["table", "table-striped", 'table-bordered'],
index=False)
return render_template(
self.template, form=form, table=table)
results=[]
results = query.groupby(
datasource=datasource,
granularity=granularity or 'all',
intervals=from_dttm + '/' + datetime.now().isoformat(),
dimensions=groupby,
aggregations={"count": client.doublesum(metric)},
#filter=filters,
limit_spec={
"type": "default",
"limit": limit,
"columns": [{
"dimension" : metric,
"direction" : "descending",
},],
},
)
viz_type = request.args.get("viz_type", "table") class HighchartsViz(BaseViz):
verbose_name = "Base Highcharts Viz"
chart_js = None
table = None
df = query.export_pandas()
template = 'panoramix/viz_highcharts.html' template = 'panoramix/viz_highcharts.html'
if df is None or df.empty: chart_kind = 'line'
flash("No data", "error") def render(self, *args, **kwargs):
elif viz_type == "table": form = self.form_class(self.form_data)
template = 'panoramix/viz_table.html' if self.df is None or self.df.empty:
df = df.sort(df.columns[0], ascending=False) flash("No data.", "error")
if granularity == 'all': else:
del df['timestamp'] table = self.df.to_html(
classes=["table", "table-striped", 'table-bordered'],
index=False)
return render_template(
self.template, form=form, table=table,
*args, **kwargs)
table = df.to_html(
classes=["table", "table-striped", 'table-bordered'], index=False) class TimeSeriesViz(HighchartsViz):
elif viz_type == "line": verbose_name = "Time Series - Line Chart"
chart_kind = "line"
def render(self):
df = self.df
df = df.pivot_table( df = df.pivot_table(
index="timestamp", index="timestamp",
columns=[ columns=[
col for col in df.columns if col not in ["timestamp", metric]], col for col in df.columns if col not in ["timestamp", metric]],
values=[metric]) values=[metric])
chart_js = serialize( chart_js = serialize(
df, render_to="chart", kind="line", **CHART_ARGS) df, kind=self.chart_kind, **CHART_ARGS)
elif viz_type == "bar": return super(TimeSeriesViz, self).render(chart_js=chart_js)
df = df.pivot_table(
index="timestamp",
columns=[ class TimeSeriesAreaViz(TimeSeriesViz):
col for col in df.columns if col not in ["timestamp", metric]], verbose_name = "Time Series - Area Chart"
values=[metric]) chart_kind = "area"
chart_js = serialize(df, render_to="chart", kind="bar", **CHART_ARGS)
elif viz_type == "bar_distro":
class DistributionBarViz(HighchartsViz):
verbose_name = "Distribution - Bar Chart"
chart_kind = "bar"
def render(self):
df = self.df
df = df.pivot_table( df = df.pivot_table(
index=[ index=[
col for col in df.columns if col not in ["timestamp", metric]], col for col in df.columns if col not in ['timestamp', metric]],
values=[metric]) values=[metric])
df = df.sort(metric, ascending=False) df = df.sort(metric, ascending=False)
chart_js = serialize(df, render_to="chart", kind="bar", **CHART_ARGS) chart_js = serialize(
df, kind=self.chart_kind, **CHART_ARGS)
return super(DistributionBarViz, self).render(chart_js=chart_js)
viz_types = {
'table': TableViz,
'line': TimeSeriesViz,
'area': TimeSeriesAreaViz,
'dist_bar': DistributionBarViz,
}
@app.route("/datasource/<name>/")
def datasource(name):
viz_type = request.args.get("viz_type", "table")
datasource = DruidDataSource(name)
viz = viz_types[viz_type](
datasource,
form_class=form_factory(datasource, request.args),
form_data=request.args)
return viz.render()
return render_template(
template,
table=table,
verbose_viz_type=viz_types[viz_type],
viz_type=viz_type,
datasource=datasource,
chart_js=chart_js,
latest_metadata=json.dumps(
metadata,
sort_keys=True,
indent=2),
results=json.dumps(
results,
sort_keys=True,
indent=2),
form=QueryForm(request.args, id="queryform"),
)
if __name__ == '__main__': if __name__ == '__main__':
app = Flask(__name__)
app.secret_key = "monkeys"
Bootstrap(app)
app.debug = True app.debug = True
app.run(host='0.0.0.0', port=PORT) app.run(host='0.0.0.0', port=PORT)

11
settings.py Normal file
View File

@ -0,0 +1,11 @@
ROW_LIMIT = 10000
DRUID_HOST = '10.181.47.80'
DRUID_PORT = 8088
DRUID_BASE_ENDPOINT = 'druid/v2'
def get_pydruid_client():
query = client.PyDruid(
"http://{0}:{1}".format(DRUID_HOST, DRUID_PORT),
DRUID_BASE_ENDPOINT)

177
viz.py Normal file
View File

@ -0,0 +1,177 @@
from pydruid import client
from pydruid.utils.filters import Dimension, Filter
from datetime import datetime
from flask import render_template, flash
import pandas as pd
from pandas_highcharts.core import serialize
CHART_ARGS = {
'figsize': (None, 700),
'title': None,
'render_to': 'chart',
}
class BaseViz(object):
verbose_name = "Base Viz"
template = "panoramix/datasource.html"
def __init__(self, datasource, form_class, form_data):
self.datasource = datasource
self.form_class = form_class
self.form_data = form_data
self.df = self.bake_query()
if self.df is not None:
self.df.timestamp = pd.to_datetime(self.df.timestamp)
self.df_prep()
self.form_prep()
def bake_query(self):
ds = self.datasource
args = self.form_data
groupby = args.getlist("groupby") or []
granularity = args.get("granularity")
metric = "count"
limit = int(args.get("limit", ROW_LIMIT)) or ROW_LIMIT
since = args.get("since", "all")
from_dttm = (datetime.now() - since_l[since]).isoformat()
# Building filters
i = 1
filters = None
while True:
col = args.get("flt_col_" + str(i))
op = args.get("flt_op_" + str(i))
eq = args.get("flt_eq_" + str(i))
if col and op and eq:
cond = None
if op == '==':
cond = Dimension(col)==eq
elif op == '!=':
cond = ~(Dimension(col)==eq)
elif op == 'in':
fields = []
for s in eq.split(','):
s = s.strip()
fields.append(Filter.build_filter(Dimension(col)==s))
cond = Filter(type="or", fields=fields)
if filters:
filters = cond and filters
else:
filters = cond
else:
break
i += 1
kw = {}
if filters:
kw['filter'] = filters
query.groupby(
datasource=ds.name,
granularity=granularity or 'all',
intervals=from_dttm + '/' + datetime.now().isoformat(),
dimensions=groupby,
aggregations={"count": client.doublesum(metric)},
#filter=filters,
limit_spec={
"type": "default",
"limit": limit,
"columns": [{
"dimension" : metric,
"direction" : "descending",
},],
},
**kw
)
return query.export_pandas()
def df_prep(self, ):
pass
def form_prep(self):
pass
def render(self, *args, **kwargs):
form = self.form_class(self.form_data)
return render_template(
self.template, form=form)
class TableViz(BaseViz):
verbose_name = "Table View"
template = 'panoramix/viz_table.html'
def render(self):
form = self.form_class(self.form_data)
if self.df is None or self.df.empty:
flash("No data.", "error")
table = None
else:
if self.form_data.get("granularity") == "all":
del self.df['timestamp']
table = self.df.to_html(
classes=["table", "table-striped", 'table-bordered'],
index=False)
return render_template(
self.template, form=form, table=table)
class HighchartsViz(BaseViz):
verbose_name = "Base Highcharts Viz"
template = 'panoramix/viz_highcharts.html'
chart_kind = 'line'
def render(self, *args, **kwargs):
form = self.form_class(self.form_data)
if self.df is None or self.df.empty:
flash("No data.", "error")
else:
table = self.df.to_html(
classes=["table", "table-striped", 'table-bordered'],
index=False)
return render_template(
self.template, form=form, table=table,
*args, **kwargs)
class TimeSeriesViz(HighchartsViz):
verbose_name = "Time Series - Line Chart"
chart_kind = "line"
def render(self):
df = self.df
df = df.pivot_table(
index="timestamp",
columns=[
col for col in df.columns if col not in ["timestamp", metric]],
values=[metric])
chart_js = serialize(
df, kind=self.chart_kind, **CHART_ARGS)
return super(TimeSeriesViz, self).render(chart_js=chart_js)
class TimeSeriesAreaViz(TimeSeriesViz):
verbose_name = "Time Series - Area Chart"
chart_kind = "area"
class DistributionBarViz(HighchartsViz):
verbose_name = "Distribution - Bar Chart"
chart_kind = "bar"
def render(self):
df = self.df
df = df.pivot_table(
index=[
col for col in df.columns if col not in ['timestamp', metric]],
values=[metric])
df = df.sort(metric, ascending=False)
chart_js = serialize(
df, kind=self.chart_kind, **CHART_ARGS)
return super(DistributionBarViz, self).render(chart_js=chart_js)
viz_types = {
'table': TableViz,
'line': TimeSeriesViz,
'area': TimeSeriesAreaViz,
'dist_bar': DistributionBarViz,
}