From 9f7466ef906bbd68b41b5ada1af23f3f2bf47535 Mon Sep 17 00:00:00 2001 From: John Bodley <4567245+john-bodley@users.noreply.github.com> Date: Sun, 16 Feb 2020 22:51:35 -0800 Subject: [PATCH] [fix] Fix table viz column order (#9122) --- superset/viz.py | 75 +++++++++++++++++++++++--------------------- tests/druid_tests.py | 4 +-- tests/viz_tests.py | 71 +++++++++++++++++++++-------------------- 3 files changed, 79 insertions(+), 71 deletions(-) diff --git a/superset/viz.py b/superset/viz.py index 0b0a6ac0a..fee3fbd40 100644 --- a/superset/viz.py +++ b/superset/viz.py @@ -30,7 +30,6 @@ import re import uuid from collections import defaultdict, OrderedDict from datetime import datetime, timedelta -from functools import reduce from itertools import product from typing import Any, Dict, List, Optional, Set, Tuple, TYPE_CHECKING @@ -532,11 +531,13 @@ class TableViz(BaseViz): d = super().query_obj() fd = self.form_data - if fd.get("all_columns") and (fd.get("groupby") or fd.get("metrics")): + if fd.get("all_columns") and ( + fd.get("groupby") or fd.get("metrics") or fd.get("percent_metrics") + ): raise Exception( _( - "Choose either fields to [Group By] and [Metrics] or " - "[Columns], not both" + "Choose either fields to [Group By] and [Metrics] and/or " + "[Percentage Metrics], or [Columns], not both" ) ) @@ -554,46 +555,50 @@ class TableViz(BaseViz): # Add all percent metrics that are not already in the list if "percent_metrics" in fd: - d["metrics"] = d["metrics"] + list( - filter(lambda m: m not in d["metrics"], fd["percent_metrics"] or []) + d["metrics"].extend( + m for m in fd["percent_metrics"] or [] if m not in d["metrics"] ) d["is_timeseries"] = self.should_be_timeseries() return d def get_data(self, df: pd.DataFrame) -> VizData: - fd = self.form_data - if not self.should_be_timeseries() and df is not None and DTTM_ALIAS in df: + """ + Transform the query result to the table representation. + + :param df: The interim dataframe + :returns: The table visualization data + + The interim dataframe comprises of the group-by and non-group-by columns and + the union of the metrics representing the non-percent and percent metrics. Note + the percent metrics have yet to be transformed. + """ + + if not self.should_be_timeseries() and DTTM_ALIAS in df: del df[DTTM_ALIAS] - # Sum up and compute percentages for all percent metrics - percent_metrics = fd.get("percent_metrics") or [] - percent_metrics = [utils.get_metric_name(m) for m in percent_metrics] + # Transform the data frame to adhere to the UI ordering of the columns and + # metrics whilst simultaneously computing the percentages (via normalization) + # for the percent metrics. + non_percent_metric_columns = ( + self.form_data.get("all_columns") or self.form_data.get("groupby") or [] + ) + utils.get_metric_names(self.form_data.get("metrics") or []) - if len(percent_metrics): - percent_metrics = list(filter(lambda m: m in df, percent_metrics)) - metric_sums = { - m: reduce(lambda a, b: a + b, df[m]) for m in percent_metrics - } - metric_percents = { - m: list( - map( - lambda a: None if metric_sums[m] == 0 else a / metric_sums[m], - df[m], - ) - ) - for m in percent_metrics - } - for m in percent_metrics: - m_name = "%" + m - df[m_name] = pd.Series(metric_percents[m], name=m_name) - # Remove metrics that are not in the main metrics list - metrics = fd.get("metrics") or [] - metrics = [utils.get_metric_name(m) for m in metrics] - for m in filter( - lambda m: m not in metrics and m in df.columns, percent_metrics - ): - del df[m] + percent_metric_columns = utils.get_metric_names( + self.form_data.get("percent_metrics") or [] + ) + + df = pd.concat( + [ + df[non_percent_metric_columns], + ( + df[percent_metric_columns] + .div(df[percent_metric_columns].sum()) + .add_prefix("%") + ), + ], + axis=1, + ) data = self.handle_js_int_overflow( dict(records=df.to_dict(orient="records"), columns=list(df.columns)) diff --git a/tests/druid_tests.py b/tests/druid_tests.py index 4a8fe5387..c0d18340f 100644 --- a/tests/druid_tests.py +++ b/tests/druid_tests.py @@ -90,12 +90,12 @@ GB_RESULT_SET = [ { "version": "v1", "timestamp": "2012-01-01T00:00:00.000Z", - "event": {"dim1": "Canada", "dim2": "boy", "metric1": 12345678}, + "event": {"dim1": "Canada", "dim2": "boy", "count": 12345678}, }, { "version": "v1", "timestamp": "2012-01-01T00:00:00.000Z", - "event": {"dim1": "USA", "dim2": "girl", "metric1": 12345678 / 2}, + "event": {"dim1": "USA", "dim2": "girl", "count": 12345678 / 2}, }, ] diff --git a/tests/viz_tests.py b/tests/viz_tests.py index d5bf6f3ce..a86b2a8a9 100644 --- a/tests/viz_tests.py +++ b/tests/viz_tests.py @@ -169,15 +169,7 @@ class BaseVizTestCase(SupersetTestCase): class TableVizTestCase(SupersetTestCase): def test_get_data_applies_percentage(self): form_data = { - "percent_metrics": [ - { - "expressionType": "SIMPLE", - "aggregate": "SUM", - "label": "SUM(value1)", - "column": {"column_name": "value1", "type": "DOUBLE"}, - }, - "avg__B", - ], + "groupby": ["groupA", "groupB"], "metrics": [ { "expressionType": "SIMPLE", @@ -188,39 +180,50 @@ class TableVizTestCase(SupersetTestCase): "count", "avg__C", ], + "percent_metrics": [ + { + "expressionType": "SIMPLE", + "aggregate": "SUM", + "label": "SUM(value1)", + "column": {"column_name": "value1", "type": "DOUBLE"}, + }, + "avg__B", + ], } datasource = self.get_datasource_mock() - raw = {} - raw["SUM(value1)"] = [15, 20, 25, 40] - raw["avg__B"] = [10, 20, 5, 15] - raw["avg__C"] = [11, 22, 33, 44] - raw["count"] = [6, 7, 8, 9] - raw["groupA"] = ["A", "B", "C", "C"] - raw["groupB"] = ["x", "x", "y", "z"] - df = pd.DataFrame(raw) + + df = pd.DataFrame( + { + "SUM(value1)": [15, 20, 25, 40], + "avg__B": [10, 20, 5, 15], + "avg__C": [11, 22, 33, 44], + "count": [6, 7, 8, 9], + "groupA": ["A", "B", "C", "C"], + "groupB": ["x", "x", "y", "z"], + } + ) + test_viz = viz.TableViz(datasource, form_data) data = test_viz.get_data(df) # Check method correctly transforms data and computes percents self.assertEqual( - set( - [ - "groupA", - "groupB", - "count", - "SUM(value1)", - "avg__C", - "%SUM(value1)", - "%avg__B", - ] - ), - set(data["columns"]), + [ + "groupA", + "groupB", + "SUM(value1)", + "count", + "avg__C", + "%SUM(value1)", + "%avg__B", + ], + list(data["columns"]), ) expected = [ { "groupA": "A", "groupB": "x", - "count": 6, "SUM(value1)": 15, + "count": 6, "avg__C": 11, "%SUM(value1)": 0.15, "%avg__B": 0.2, @@ -228,8 +231,8 @@ class TableVizTestCase(SupersetTestCase): { "groupA": "B", "groupB": "x", - "count": 7, "SUM(value1)": 20, + "count": 7, "avg__C": 22, "%SUM(value1)": 0.2, "%avg__B": 0.4, @@ -237,8 +240,8 @@ class TableVizTestCase(SupersetTestCase): { "groupA": "C", "groupB": "y", - "count": 8, "SUM(value1)": 25, + "count": 8, "avg__C": 33, "%SUM(value1)": 0.25, "%avg__B": 0.1, @@ -246,10 +249,10 @@ class TableVizTestCase(SupersetTestCase): { "groupA": "C", "groupB": "z", - "count": 9, "SUM(value1)": 40, + "count": 9, "avg__C": 44, - "%SUM(value1)": 0.40, + "%SUM(value1)": 0.4, "%avg__B": 0.3, }, ]