[fix] Fix table viz column order (#9122)

2020-02-16 22:51:35 -08:00 · 2020-02-16 22:51:35 -08:00 · 9f7466ef90
parent a7e433a512
commit 9f7466ef90
3 changed files with 79 additions and 71 deletions
--- a/superset/viz.py
+++ b/superset/viz.py
@ -30,7 +30,6 @@ import re
 import uuid
 from collections import defaultdict, OrderedDict
 from datetime import datetime, timedelta
-from functools import reduce
 from itertools import product
 from typing import Any, Dict, List, Optional, Set, Tuple, TYPE_CHECKING

@ -532,11 +531,13 @@ class TableViz(BaseViz):
        d = super().query_obj()
        fd = self.form_data

-        if fd.get("all_columns") and (fd.get("groupby") or fd.get("metrics")):
+        if fd.get("all_columns") and (
+            fd.get("groupby") or fd.get("metrics") or fd.get("percent_metrics")
+        ):
            raise Exception(
                _(
-                    "Choose either fields to [Group By] and [Metrics] or "
-                    "[Columns], not both"
+                    "Choose either fields to [Group By] and [Metrics] and/or "
+                    "[Percentage Metrics], or [Columns], not both"
                )
            )

@ -554,46 +555,50 @@ class TableViz(BaseViz):

        # Add all percent metrics that are not already in the list
        if "percent_metrics" in fd:
-            d["metrics"] = d["metrics"] + list(
-                filter(lambda m: m not in d["metrics"], fd["percent_metrics"] or [])
+            d["metrics"].extend(
+                m for m in fd["percent_metrics"] or [] if m not in d["metrics"]
            )

        d["is_timeseries"] = self.should_be_timeseries()
        return d

    def get_data(self, df: pd.DataFrame) -> VizData:
-        fd = self.form_data
-        if not self.should_be_timeseries() and df is not None and DTTM_ALIAS in df:
+        """
+        Transform the query result to the table representation.
+
+        :param df: The interim dataframe
+        :returns: The table visualization data
+
+        The interim dataframe comprises of the group-by and non-group-by columns and
+        the union of the metrics representing the non-percent and percent metrics. Note
+        the percent metrics have yet to be transformed.
+        """
+
+        if not self.should_be_timeseries() and DTTM_ALIAS in df:
            del df[DTTM_ALIAS]

-        # Sum up and compute percentages for all percent metrics
-        percent_metrics = fd.get("percent_metrics") or []
-        percent_metrics = [utils.get_metric_name(m) for m in percent_metrics]
+        # Transform the data frame to adhere to the UI ordering of the columns and
+        # metrics whilst simultaneously computing the percentages (via normalization)
+        # for the percent metrics.
+        non_percent_metric_columns = (
+            self.form_data.get("all_columns") or self.form_data.get("groupby") or []
+        ) + utils.get_metric_names(self.form_data.get("metrics") or [])

-        if len(percent_metrics):
-            percent_metrics = list(filter(lambda m: m in df, percent_metrics))
-            metric_sums = {
-                m: reduce(lambda a, b: a + b, df[m]) for m in percent_metrics
-            }
-            metric_percents = {
-                m: list(
-                    map(
-                        lambda a: None if metric_sums[m] == 0 else a / metric_sums[m],
-                        df[m],
-                    )
-                )
-                for m in percent_metrics
-            }
-            for m in percent_metrics:
-                m_name = "%" + m
-                df[m_name] = pd.Series(metric_percents[m], name=m_name)
-            # Remove metrics that are not in the main metrics list
-            metrics = fd.get("metrics") or []
-            metrics = [utils.get_metric_name(m) for m in metrics]
-            for m in filter(
-                lambda m: m not in metrics and m in df.columns, percent_metrics
-            ):
-                del df[m]
+        percent_metric_columns = utils.get_metric_names(
+            self.form_data.get("percent_metrics") or []
+        )
+
+        df = pd.concat(
+            [
+                df[non_percent_metric_columns],
+                (
+                    df[percent_metric_columns]
+                    .div(df[percent_metric_columns].sum())
+                    .add_prefix("%")
+                ),
+            ],
+            axis=1,
+        )

        data = self.handle_js_int_overflow(
            dict(records=df.to_dict(orient="records"), columns=list(df.columns))
--- a/tests/druid_tests.py
+++ b/tests/druid_tests.py
@ -90,12 +90,12 @@ GB_RESULT_SET = [
    {
        "version": "v1",
        "timestamp": "2012-01-01T00:00:00.000Z",
-        "event": {"dim1": "Canada", "dim2": "boy", "metric1": 12345678},
+        "event": {"dim1": "Canada", "dim2": "boy", "count": 12345678},
    },
    {
        "version": "v1",
        "timestamp": "2012-01-01T00:00:00.000Z",
-        "event": {"dim1": "USA", "dim2": "girl", "metric1": 12345678 / 2},
+        "event": {"dim1": "USA", "dim2": "girl", "count": 12345678 / 2},
    },
 ]

--- a/tests/viz_tests.py
+++ b/tests/viz_tests.py
@ -169,15 +169,7 @@ class BaseVizTestCase(SupersetTestCase):
 class TableVizTestCase(SupersetTestCase):
    def test_get_data_applies_percentage(self):
        form_data = {
-            "percent_metrics": [
-                {
-                    "expressionType": "SIMPLE",
-                    "aggregate": "SUM",
-                    "label": "SUM(value1)",
-                    "column": {"column_name": "value1", "type": "DOUBLE"},
-                },
-                "avg__B",
-            ],
+            "groupby": ["groupA", "groupB"],
            "metrics": [
                {
                    "expressionType": "SIMPLE",
@ -188,39 +180,50 @@ class TableVizTestCase(SupersetTestCase):
                "count",
                "avg__C",
            ],
+            "percent_metrics": [
+                {
+                    "expressionType": "SIMPLE",
+                    "aggregate": "SUM",
+                    "label": "SUM(value1)",
+                    "column": {"column_name": "value1", "type": "DOUBLE"},
+                },
+                "avg__B",
+            ],
        }
        datasource = self.get_datasource_mock()
-        raw = {}
-        raw["SUM(value1)"] = [15, 20, 25, 40]
-        raw["avg__B"] = [10, 20, 5, 15]
-        raw["avg__C"] = [11, 22, 33, 44]
-        raw["count"] = [6, 7, 8, 9]
-        raw["groupA"] = ["A", "B", "C", "C"]
-        raw["groupB"] = ["x", "x", "y", "z"]
-        df = pd.DataFrame(raw)
+
+        df = pd.DataFrame(
+            {
+                "SUM(value1)": [15, 20, 25, 40],
+                "avg__B": [10, 20, 5, 15],
+                "avg__C": [11, 22, 33, 44],
+                "count": [6, 7, 8, 9],
+                "groupA": ["A", "B", "C", "C"],
+                "groupB": ["x", "x", "y", "z"],
+            }
+        )
+
        test_viz = viz.TableViz(datasource, form_data)
        data = test_viz.get_data(df)
        # Check method correctly transforms data and computes percents
        self.assertEqual(
-            set(
-                [
-                    "groupA",
-                    "groupB",
-                    "count",
-                    "SUM(value1)",
-                    "avg__C",
-                    "%SUM(value1)",
-                    "%avg__B",
-                ]
-            ),
-            set(data["columns"]),
+            [
+                "groupA",
+                "groupB",
+                "SUM(value1)",
+                "count",
+                "avg__C",
+                "%SUM(value1)",
+                "%avg__B",
+            ],
+            list(data["columns"]),
        )
        expected = [
            {
                "groupA": "A",
                "groupB": "x",
-                "count": 6,
                "SUM(value1)": 15,
+                "count": 6,
                "avg__C": 11,
                "%SUM(value1)": 0.15,
                "%avg__B": 0.2,
@ -228,8 +231,8 @@ class TableVizTestCase(SupersetTestCase):
            {
                "groupA": "B",
                "groupB": "x",
-                "count": 7,
                "SUM(value1)": 20,
+                "count": 7,
                "avg__C": 22,
                "%SUM(value1)": 0.2,
                "%avg__B": 0.4,
@ -237,8 +240,8 @@ class TableVizTestCase(SupersetTestCase):
            {
                "groupA": "C",
                "groupB": "y",
-                "count": 8,
                "SUM(value1)": 25,
+                "count": 8,
                "avg__C": 33,
                "%SUM(value1)": 0.25,
                "%avg__B": 0.1,
@ -246,10 +249,10 @@ class TableVizTestCase(SupersetTestCase):
            {
                "groupA": "C",
                "groupB": "z",
-                "count": 9,
                "SUM(value1)": 40,
+                "count": 9,
                "avg__C": 44,
-                "%SUM(value1)": 0.40,
+                "%SUM(value1)": 0.4,
                "%avg__B": 0.3,
            },
        ]