From 9f7466ef906bbd68b41b5ada1af23f3f2bf47535 Mon Sep 17 00:00:00 2001
From: John Bodley <4567245+john-bodley@users.noreply.github.com>
Date: Sun, 16 Feb 2020 22:51:35 -0800
Subject: [PATCH] [fix] Fix table viz column order (#9122)

---
 superset/viz.py      | 75 +++++++++++++++++++++++---------------------
 tests/druid_tests.py |  4 +--
 tests/viz_tests.py   | 71 +++++++++++++++++++++--------------------
 3 files changed, 79 insertions(+), 71 deletions(-)

diff --git a/superset/viz.py b/superset/viz.py
index 0b0a6ac0a..fee3fbd40 100644
--- a/superset/viz.py
+++ b/superset/viz.py
@@ -30,7 +30,6 @@ import re
 import uuid
 from collections import defaultdict, OrderedDict
 from datetime import datetime, timedelta
-from functools import reduce
 from itertools import product
 from typing import Any, Dict, List, Optional, Set, Tuple, TYPE_CHECKING
 
@@ -532,11 +531,13 @@ class TableViz(BaseViz):
         d = super().query_obj()
         fd = self.form_data
 
-        if fd.get("all_columns") and (fd.get("groupby") or fd.get("metrics")):
+        if fd.get("all_columns") and (
+            fd.get("groupby") or fd.get("metrics") or fd.get("percent_metrics")
+        ):
             raise Exception(
                 _(
-                    "Choose either fields to [Group By] and [Metrics] or "
-                    "[Columns], not both"
+                    "Choose either fields to [Group By] and [Metrics] and/or "
+                    "[Percentage Metrics], or [Columns], not both"
                 )
             )
 
@@ -554,46 +555,50 @@ class TableViz(BaseViz):
 
         # Add all percent metrics that are not already in the list
         if "percent_metrics" in fd:
-            d["metrics"] = d["metrics"] + list(
-                filter(lambda m: m not in d["metrics"], fd["percent_metrics"] or [])
+            d["metrics"].extend(
+                m for m in fd["percent_metrics"] or [] if m not in d["metrics"]
             )
 
         d["is_timeseries"] = self.should_be_timeseries()
         return d
 
     def get_data(self, df: pd.DataFrame) -> VizData:
-        fd = self.form_data
-        if not self.should_be_timeseries() and df is not None and DTTM_ALIAS in df:
+        """
+        Transform the query result to the table representation.
+
+        :param df: The interim dataframe
+        :returns: The table visualization data
+
+        The interim dataframe comprises of the group-by and non-group-by columns and
+        the union of the metrics representing the non-percent and percent metrics. Note
+        the percent metrics have yet to be transformed.
+        """
+
+        if not self.should_be_timeseries() and DTTM_ALIAS in df:
             del df[DTTM_ALIAS]
 
-        # Sum up and compute percentages for all percent metrics
-        percent_metrics = fd.get("percent_metrics") or []
-        percent_metrics = [utils.get_metric_name(m) for m in percent_metrics]
+        # Transform the data frame to adhere to the UI ordering of the columns and
+        # metrics whilst simultaneously computing the percentages (via normalization)
+        # for the percent metrics.
+        non_percent_metric_columns = (
+            self.form_data.get("all_columns") or self.form_data.get("groupby") or []
+        ) + utils.get_metric_names(self.form_data.get("metrics") or [])
 
-        if len(percent_metrics):
-            percent_metrics = list(filter(lambda m: m in df, percent_metrics))
-            metric_sums = {
-                m: reduce(lambda a, b: a + b, df[m]) for m in percent_metrics
-            }
-            metric_percents = {
-                m: list(
-                    map(
-                        lambda a: None if metric_sums[m] == 0 else a / metric_sums[m],
-                        df[m],
-                    )
-                )
-                for m in percent_metrics
-            }
-            for m in percent_metrics:
-                m_name = "%" + m
-                df[m_name] = pd.Series(metric_percents[m], name=m_name)
-            # Remove metrics that are not in the main metrics list
-            metrics = fd.get("metrics") or []
-            metrics = [utils.get_metric_name(m) for m in metrics]
-            for m in filter(
-                lambda m: m not in metrics and m in df.columns, percent_metrics
-            ):
-                del df[m]
+        percent_metric_columns = utils.get_metric_names(
+            self.form_data.get("percent_metrics") or []
+        )
+
+        df = pd.concat(
+            [
+                df[non_percent_metric_columns],
+                (
+                    df[percent_metric_columns]
+                    .div(df[percent_metric_columns].sum())
+                    .add_prefix("%")
+                ),
+            ],
+            axis=1,
+        )
 
         data = self.handle_js_int_overflow(
             dict(records=df.to_dict(orient="records"), columns=list(df.columns))
diff --git a/tests/druid_tests.py b/tests/druid_tests.py
index 4a8fe5387..c0d18340f 100644
--- a/tests/druid_tests.py
+++ b/tests/druid_tests.py
@@ -90,12 +90,12 @@ GB_RESULT_SET = [
     {
         "version": "v1",
         "timestamp": "2012-01-01T00:00:00.000Z",
-        "event": {"dim1": "Canada", "dim2": "boy", "metric1": 12345678},
+        "event": {"dim1": "Canada", "dim2": "boy", "count": 12345678},
     },
     {
         "version": "v1",
         "timestamp": "2012-01-01T00:00:00.000Z",
-        "event": {"dim1": "USA", "dim2": "girl", "metric1": 12345678 / 2},
+        "event": {"dim1": "USA", "dim2": "girl", "count": 12345678 / 2},
     },
 ]
 
diff --git a/tests/viz_tests.py b/tests/viz_tests.py
index d5bf6f3ce..a86b2a8a9 100644
--- a/tests/viz_tests.py
+++ b/tests/viz_tests.py
@@ -169,15 +169,7 @@ class BaseVizTestCase(SupersetTestCase):
 class TableVizTestCase(SupersetTestCase):
     def test_get_data_applies_percentage(self):
         form_data = {
-            "percent_metrics": [
-                {
-                    "expressionType": "SIMPLE",
-                    "aggregate": "SUM",
-                    "label": "SUM(value1)",
-                    "column": {"column_name": "value1", "type": "DOUBLE"},
-                },
-                "avg__B",
-            ],
+            "groupby": ["groupA", "groupB"],
             "metrics": [
                 {
                     "expressionType": "SIMPLE",
@@ -188,39 +180,50 @@ class TableVizTestCase(SupersetTestCase):
                 "count",
                 "avg__C",
             ],
+            "percent_metrics": [
+                {
+                    "expressionType": "SIMPLE",
+                    "aggregate": "SUM",
+                    "label": "SUM(value1)",
+                    "column": {"column_name": "value1", "type": "DOUBLE"},
+                },
+                "avg__B",
+            ],
         }
         datasource = self.get_datasource_mock()
-        raw = {}
-        raw["SUM(value1)"] = [15, 20, 25, 40]
-        raw["avg__B"] = [10, 20, 5, 15]
-        raw["avg__C"] = [11, 22, 33, 44]
-        raw["count"] = [6, 7, 8, 9]
-        raw["groupA"] = ["A", "B", "C", "C"]
-        raw["groupB"] = ["x", "x", "y", "z"]
-        df = pd.DataFrame(raw)
+
+        df = pd.DataFrame(
+            {
+                "SUM(value1)": [15, 20, 25, 40],
+                "avg__B": [10, 20, 5, 15],
+                "avg__C": [11, 22, 33, 44],
+                "count": [6, 7, 8, 9],
+                "groupA": ["A", "B", "C", "C"],
+                "groupB": ["x", "x", "y", "z"],
+            }
+        )
+
         test_viz = viz.TableViz(datasource, form_data)
         data = test_viz.get_data(df)
         # Check method correctly transforms data and computes percents
         self.assertEqual(
-            set(
-                [
-                    "groupA",
-                    "groupB",
-                    "count",
-                    "SUM(value1)",
-                    "avg__C",
-                    "%SUM(value1)",
-                    "%avg__B",
-                ]
-            ),
-            set(data["columns"]),
+            [
+                "groupA",
+                "groupB",
+                "SUM(value1)",
+                "count",
+                "avg__C",
+                "%SUM(value1)",
+                "%avg__B",
+            ],
+            list(data["columns"]),
         )
         expected = [
             {
                 "groupA": "A",
                 "groupB": "x",
-                "count": 6,
                 "SUM(value1)": 15,
+                "count": 6,
                 "avg__C": 11,
                 "%SUM(value1)": 0.15,
                 "%avg__B": 0.2,
@@ -228,8 +231,8 @@ class TableVizTestCase(SupersetTestCase):
             {
                 "groupA": "B",
                 "groupB": "x",
-                "count": 7,
                 "SUM(value1)": 20,
+                "count": 7,
                 "avg__C": 22,
                 "%SUM(value1)": 0.2,
                 "%avg__B": 0.4,
@@ -237,8 +240,8 @@ class TableVizTestCase(SupersetTestCase):
             {
                 "groupA": "C",
                 "groupB": "y",
-                "count": 8,
                 "SUM(value1)": 25,
+                "count": 8,
                 "avg__C": 33,
                 "%SUM(value1)": 0.25,
                 "%avg__B": 0.1,
@@ -246,10 +249,10 @@ class TableVizTestCase(SupersetTestCase):
             {
                 "groupA": "C",
                 "groupB": "z",
-                "count": 9,
                 "SUM(value1)": 40,
+                "count": 9,
                 "avg__C": 44,
-                "%SUM(value1)": 0.40,
+                "%SUM(value1)": 0.4,
                 "%avg__B": 0.3,
             },
         ]