Allow user to customize query cost estimate (#8470)

* Allow user to customize query estimate * Add docs; run black * Update docs with types
2019-11-04 11:08:00 -08:00 · 2019-11-04 11:08:00 -08:00 · 338a2b1a51
parent 9a29116d6b
commit 338a2b1a51
4 changed files with 75 additions and 8 deletions
--- a/docs/sqllab.rst
+++ b/docs/sqllab.rst
@ -123,6 +123,36 @@ database configuration:
 Here, "version" should be the version of your Presto cluster. Support for this
 functionality was introduced in Presto 0.319.

+You also need to enable the feature flag in your `superset_config.py`, and you
+can optionally specify a custom formatter. Eg:
+
+.. code-block:: python
+
+	def presto_query_cost_formatter(cost_estimate: List[Dict[str, float]]) -> List[Dict[str, str]]:
+        """
+        Format cost estimate returned by Presto.
+
+        :param cost_estimate: JSON estimate from Presto
+        :return: Human readable cost estimate
+        """
+        # Convert cost to dollars based on CPU and network cost. These coefficients are just
+        # examples, they need to be estimated based on your infrastructure.
+		cpu_coefficient = 2e-12
+		network_coefficient = 1e-12
+
+		cost = 0
+		for row in cost_estimate:
+			cost += row.get("cpuCost", 0) * cpu_coefficient
+			cost += row.get("networkCost", 0) * network_coefficient
+
+		return [{"Cost": f"US$ {cost:.2f}"}]
+
+
+	DEFAULT_FEATURE_FLAGS = {
+		"ESTIMATE_QUERY_COST": True,
+		"QUERY_COST_FORMATTERS_BY_ENGINE": {"presto": presto_query_cost_formatter},
+	}
+
 .. _ref_ctas_engine_config:

 Create Table As (CTAS)
--- a/superset/db_engine_specs/base.py
+++ b/superset/db_engine_specs/base.py
@ -674,7 +674,7 @@ class BaseEngineSpec:  # pylint: disable=too-many-public-methods
    @classmethod
    def estimate_statement_cost(
        cls, statement: str, database, cursor, user_name: str
-    ) -> Dict[str, str]:
+    ) -> Dict[str, Any]:
        """
        Generate a SQL query that estimates the cost of a given statement.

@ -682,6 +682,19 @@ class BaseEngineSpec:  # pylint: disable=too-many-public-methods
        :param database: Database instance
        :param cursor: Cursor instance
        :param username: Effective username
+        :return: Dictionary with different costs
+        """
+        raise Exception("Database does not support cost estimation")
+
+    @classmethod
+    def query_cost_formatter(
+        cls, raw_cost: List[Dict[str, Any]]
+    ) -> List[Dict[str, str]]:
+        """
+        Format cost estimate.
+
+        :param raw_cost: Raw estimate from `estimate_query_cost`
+        :return: Human readable cost estimate
        """
        raise Exception("Database does not support cost estimation")

--- a/superset/db_engine_specs/presto.py
+++ b/superset/db_engine_specs/presto.py
@ -443,14 +443,15 @@ class PrestoEngineSpec(BaseEngineSpec):
    @classmethod
    def estimate_statement_cost(  # pylint: disable=too-many-locals
        cls, statement: str, database, cursor, user_name: str
-    ) -> Dict[str, str]:
+    ) -> Dict[str, float]:
        """
-        Generate a SQL query that estimates the cost of a given statement.
+        Run a SQL query that estimates the cost of a given statement.

        :param statement: A single SQL statement
        :param database: Database instance
        :param cursor: Cursor instance
        :param username: Effective username
+        :return: JSON estimate from Presto
        """
        parsed_query = ParsedQuery(statement)
        sql = parsed_query.stripped()
@ -476,7 +477,18 @@ class PrestoEngineSpec(BaseEngineSpec):
        #     }
        #   }
        result = json.loads(cursor.fetchone()[0])
-        estimate = result["estimate"]
+        return result["estimate"]
+
+    @classmethod
+    def query_cost_formatter(
+        cls, raw_cost: List[Dict[str, float]]
+    ) -> List[Dict[str, str]]:
+        """
+        Format cost estimate.
+
+        :param raw_cost: JSON estimate from Presto
+        :return: Human readable cost estimate
+        """

        def humanize(value: Any, suffix: str) -> str:
            try:
@ -493,7 +505,7 @@ class PrestoEngineSpec(BaseEngineSpec):

            return f"{value} {prefix}{suffix}"

-        cost = {}
+        cost = []
        columns = [
            ("outputRowCount", "Output count", " rows"),
            ("outputSizeInBytes", "Output size", "B"),
@ -501,9 +513,12 @@ class PrestoEngineSpec(BaseEngineSpec):
            ("maxMemory", "Max memory", "B"),
            ("networkCost", "Network cost", ""),
        ]
-        for key, label, suffix in columns:
-            if key in estimate:
-                cost[label] = humanize(estimate[key], suffix)
+        for row in raw_cost:
+            statement_cost = {}
+            for key, label, suffix in columns:
+                if key in row:
+                    statement_cost[label] = humanize(row[key], suffix).strip()
+            cost.append(statement_cost)

        return cost

--- a/superset/views/core.py
+++ b/superset/views/core.py
@ -2406,6 +2406,15 @@ class Superset(BaseSupersetView):
        except Exception as e:
            return json_error_response(str(e))

+        spec = mydb.db_engine_spec
+        query_cost_formatters = get_feature_flags().get(
+            "QUERY_COST_FORMATTERS_BY_ENGINE", {}
+        )
+        query_cost_formatter = query_cost_formatters.get(
+            spec.engine, spec.query_cost_formatter
+        )
+        cost = query_cost_formatter(cost)
+
        return json_success(json.dumps(cost))

    @expose("/theme/")