diff --git a/docs/sqllab.rst b/docs/sqllab.rst index 39ae4d4d7..efb5116f3 100644 --- a/docs/sqllab.rst +++ b/docs/sqllab.rst @@ -123,6 +123,36 @@ database configuration: Here, "version" should be the version of your Presto cluster. Support for this functionality was introduced in Presto 0.319. +You also need to enable the feature flag in your `superset_config.py`, and you +can optionally specify a custom formatter. Eg: + +.. code-block:: python + + def presto_query_cost_formatter(cost_estimate: List[Dict[str, float]]) -> List[Dict[str, str]]: + """ + Format cost estimate returned by Presto. + + :param cost_estimate: JSON estimate from Presto + :return: Human readable cost estimate + """ + # Convert cost to dollars based on CPU and network cost. These coefficients are just + # examples, they need to be estimated based on your infrastructure. + cpu_coefficient = 2e-12 + network_coefficient = 1e-12 + + cost = 0 + for row in cost_estimate: + cost += row.get("cpuCost", 0) * cpu_coefficient + cost += row.get("networkCost", 0) * network_coefficient + + return [{"Cost": f"US$ {cost:.2f}"}] + + + DEFAULT_FEATURE_FLAGS = { + "ESTIMATE_QUERY_COST": True, + "QUERY_COST_FORMATTERS_BY_ENGINE": {"presto": presto_query_cost_formatter}, + } + .. _ref_ctas_engine_config: Create Table As (CTAS) diff --git a/superset/db_engine_specs/base.py b/superset/db_engine_specs/base.py index 72ec4d767..7d81d57d8 100644 --- a/superset/db_engine_specs/base.py +++ b/superset/db_engine_specs/base.py @@ -674,7 +674,7 @@ class BaseEngineSpec: # pylint: disable=too-many-public-methods @classmethod def estimate_statement_cost( cls, statement: str, database, cursor, user_name: str - ) -> Dict[str, str]: + ) -> Dict[str, Any]: """ Generate a SQL query that estimates the cost of a given statement. @@ -682,6 +682,19 @@ class BaseEngineSpec: # pylint: disable=too-many-public-methods :param database: Database instance :param cursor: Cursor instance :param username: Effective username + :return: Dictionary with different costs + """ + raise Exception("Database does not support cost estimation") + + @classmethod + def query_cost_formatter( + cls, raw_cost: List[Dict[str, Any]] + ) -> List[Dict[str, str]]: + """ + Format cost estimate. + + :param raw_cost: Raw estimate from `estimate_query_cost` + :return: Human readable cost estimate """ raise Exception("Database does not support cost estimation") diff --git a/superset/db_engine_specs/presto.py b/superset/db_engine_specs/presto.py index a3569371b..9f91969d0 100644 --- a/superset/db_engine_specs/presto.py +++ b/superset/db_engine_specs/presto.py @@ -443,14 +443,15 @@ class PrestoEngineSpec(BaseEngineSpec): @classmethod def estimate_statement_cost( # pylint: disable=too-many-locals cls, statement: str, database, cursor, user_name: str - ) -> Dict[str, str]: + ) -> Dict[str, float]: """ - Generate a SQL query that estimates the cost of a given statement. + Run a SQL query that estimates the cost of a given statement. :param statement: A single SQL statement :param database: Database instance :param cursor: Cursor instance :param username: Effective username + :return: JSON estimate from Presto """ parsed_query = ParsedQuery(statement) sql = parsed_query.stripped() @@ -476,7 +477,18 @@ class PrestoEngineSpec(BaseEngineSpec): # } # } result = json.loads(cursor.fetchone()[0]) - estimate = result["estimate"] + return result["estimate"] + + @classmethod + def query_cost_formatter( + cls, raw_cost: List[Dict[str, float]] + ) -> List[Dict[str, str]]: + """ + Format cost estimate. + + :param raw_cost: JSON estimate from Presto + :return: Human readable cost estimate + """ def humanize(value: Any, suffix: str) -> str: try: @@ -493,7 +505,7 @@ class PrestoEngineSpec(BaseEngineSpec): return f"{value} {prefix}{suffix}" - cost = {} + cost = [] columns = [ ("outputRowCount", "Output count", " rows"), ("outputSizeInBytes", "Output size", "B"), @@ -501,9 +513,12 @@ class PrestoEngineSpec(BaseEngineSpec): ("maxMemory", "Max memory", "B"), ("networkCost", "Network cost", ""), ] - for key, label, suffix in columns: - if key in estimate: - cost[label] = humanize(estimate[key], suffix) + for row in raw_cost: + statement_cost = {} + for key, label, suffix in columns: + if key in row: + statement_cost[label] = humanize(row[key], suffix).strip() + cost.append(statement_cost) return cost diff --git a/superset/views/core.py b/superset/views/core.py index c431e6bae..e15964105 100755 --- a/superset/views/core.py +++ b/superset/views/core.py @@ -2406,6 +2406,15 @@ class Superset(BaseSupersetView): except Exception as e: return json_error_response(str(e)) + spec = mydb.db_engine_spec + query_cost_formatters = get_feature_flags().get( + "QUERY_COST_FORMATTERS_BY_ENGINE", {} + ) + query_cost_formatter = query_cost_formatters.get( + spec.engine, spec.query_cost_formatter + ) + cost = query_cost_formatter(cost) + return json_success(json.dumps(cost)) @expose("/theme/")