[fix] Fix table viz column order (#9122)

This commit is contained in:
John Bodley 2020-02-16 22:51:35 -08:00 committed by GitHub
parent a7e433a512
commit 9f7466ef90
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
3 changed files with 79 additions and 71 deletions

View File

@ -30,7 +30,6 @@ import re
import uuid
from collections import defaultdict, OrderedDict
from datetime import datetime, timedelta
from functools import reduce
from itertools import product
from typing import Any, Dict, List, Optional, Set, Tuple, TYPE_CHECKING
@ -532,11 +531,13 @@ class TableViz(BaseViz):
d = super().query_obj()
fd = self.form_data
if fd.get("all_columns") and (fd.get("groupby") or fd.get("metrics")):
if fd.get("all_columns") and (
fd.get("groupby") or fd.get("metrics") or fd.get("percent_metrics")
):
raise Exception(
_(
"Choose either fields to [Group By] and [Metrics] or "
"[Columns], not both"
"Choose either fields to [Group By] and [Metrics] and/or "
"[Percentage Metrics], or [Columns], not both"
)
)
@ -554,46 +555,50 @@ class TableViz(BaseViz):
# Add all percent metrics that are not already in the list
if "percent_metrics" in fd:
d["metrics"] = d["metrics"] + list(
filter(lambda m: m not in d["metrics"], fd["percent_metrics"] or [])
d["metrics"].extend(
m for m in fd["percent_metrics"] or [] if m not in d["metrics"]
)
d["is_timeseries"] = self.should_be_timeseries()
return d
def get_data(self, df: pd.DataFrame) -> VizData:
fd = self.form_data
if not self.should_be_timeseries() and df is not None and DTTM_ALIAS in df:
"""
Transform the query result to the table representation.
:param df: The interim dataframe
:returns: The table visualization data
The interim dataframe comprises of the group-by and non-group-by columns and
the union of the metrics representing the non-percent and percent metrics. Note
the percent metrics have yet to be transformed.
"""
if not self.should_be_timeseries() and DTTM_ALIAS in df:
del df[DTTM_ALIAS]
# Sum up and compute percentages for all percent metrics
percent_metrics = fd.get("percent_metrics") or []
percent_metrics = [utils.get_metric_name(m) for m in percent_metrics]
# Transform the data frame to adhere to the UI ordering of the columns and
# metrics whilst simultaneously computing the percentages (via normalization)
# for the percent metrics.
non_percent_metric_columns = (
self.form_data.get("all_columns") or self.form_data.get("groupby") or []
) + utils.get_metric_names(self.form_data.get("metrics") or [])
if len(percent_metrics):
percent_metrics = list(filter(lambda m: m in df, percent_metrics))
metric_sums = {
m: reduce(lambda a, b: a + b, df[m]) for m in percent_metrics
}
metric_percents = {
m: list(
map(
lambda a: None if metric_sums[m] == 0 else a / metric_sums[m],
df[m],
)
)
for m in percent_metrics
}
for m in percent_metrics:
m_name = "%" + m
df[m_name] = pd.Series(metric_percents[m], name=m_name)
# Remove metrics that are not in the main metrics list
metrics = fd.get("metrics") or []
metrics = [utils.get_metric_name(m) for m in metrics]
for m in filter(
lambda m: m not in metrics and m in df.columns, percent_metrics
):
del df[m]
percent_metric_columns = utils.get_metric_names(
self.form_data.get("percent_metrics") or []
)
df = pd.concat(
[
df[non_percent_metric_columns],
(
df[percent_metric_columns]
.div(df[percent_metric_columns].sum())
.add_prefix("%")
),
],
axis=1,
)
data = self.handle_js_int_overflow(
dict(records=df.to_dict(orient="records"), columns=list(df.columns))

View File

@ -90,12 +90,12 @@ GB_RESULT_SET = [
{
"version": "v1",
"timestamp": "2012-01-01T00:00:00.000Z",
"event": {"dim1": "Canada", "dim2": "boy", "metric1": 12345678},
"event": {"dim1": "Canada", "dim2": "boy", "count": 12345678},
},
{
"version": "v1",
"timestamp": "2012-01-01T00:00:00.000Z",
"event": {"dim1": "USA", "dim2": "girl", "metric1": 12345678 / 2},
"event": {"dim1": "USA", "dim2": "girl", "count": 12345678 / 2},
},
]

View File

@ -169,15 +169,7 @@ class BaseVizTestCase(SupersetTestCase):
class TableVizTestCase(SupersetTestCase):
def test_get_data_applies_percentage(self):
form_data = {
"percent_metrics": [
{
"expressionType": "SIMPLE",
"aggregate": "SUM",
"label": "SUM(value1)",
"column": {"column_name": "value1", "type": "DOUBLE"},
},
"avg__B",
],
"groupby": ["groupA", "groupB"],
"metrics": [
{
"expressionType": "SIMPLE",
@ -188,39 +180,50 @@ class TableVizTestCase(SupersetTestCase):
"count",
"avg__C",
],
"percent_metrics": [
{
"expressionType": "SIMPLE",
"aggregate": "SUM",
"label": "SUM(value1)",
"column": {"column_name": "value1", "type": "DOUBLE"},
},
"avg__B",
],
}
datasource = self.get_datasource_mock()
raw = {}
raw["SUM(value1)"] = [15, 20, 25, 40]
raw["avg__B"] = [10, 20, 5, 15]
raw["avg__C"] = [11, 22, 33, 44]
raw["count"] = [6, 7, 8, 9]
raw["groupA"] = ["A", "B", "C", "C"]
raw["groupB"] = ["x", "x", "y", "z"]
df = pd.DataFrame(raw)
df = pd.DataFrame(
{
"SUM(value1)": [15, 20, 25, 40],
"avg__B": [10, 20, 5, 15],
"avg__C": [11, 22, 33, 44],
"count": [6, 7, 8, 9],
"groupA": ["A", "B", "C", "C"],
"groupB": ["x", "x", "y", "z"],
}
)
test_viz = viz.TableViz(datasource, form_data)
data = test_viz.get_data(df)
# Check method correctly transforms data and computes percents
self.assertEqual(
set(
[
"groupA",
"groupB",
"count",
"SUM(value1)",
"avg__C",
"%SUM(value1)",
"%avg__B",
]
),
set(data["columns"]),
[
"groupA",
"groupB",
"SUM(value1)",
"count",
"avg__C",
"%SUM(value1)",
"%avg__B",
],
list(data["columns"]),
)
expected = [
{
"groupA": "A",
"groupB": "x",
"count": 6,
"SUM(value1)": 15,
"count": 6,
"avg__C": 11,
"%SUM(value1)": 0.15,
"%avg__B": 0.2,
@ -228,8 +231,8 @@ class TableVizTestCase(SupersetTestCase):
{
"groupA": "B",
"groupB": "x",
"count": 7,
"SUM(value1)": 20,
"count": 7,
"avg__C": 22,
"%SUM(value1)": 0.2,
"%avg__B": 0.4,
@ -237,8 +240,8 @@ class TableVizTestCase(SupersetTestCase):
{
"groupA": "C",
"groupB": "y",
"count": 8,
"SUM(value1)": 25,
"count": 8,
"avg__C": 33,
"%SUM(value1)": 0.25,
"%avg__B": 0.1,
@ -246,10 +249,10 @@ class TableVizTestCase(SupersetTestCase):
{
"groupA": "C",
"groupB": "z",
"count": 9,
"SUM(value1)": 40,
"count": 9,
"avg__C": 44,
"%SUM(value1)": 0.40,
"%SUM(value1)": 0.4,
"%avg__B": 0.3,
},
]