Allow user to customize query cost estimate (#8470)

* Allow user to customize query estimate

* Add docs; run black

* Update docs with types
This commit is contained in:
Beto Dealmeida 2019-11-04 11:08:00 -08:00 committed by GitHub
parent 9a29116d6b
commit 338a2b1a51
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
4 changed files with 75 additions and 8 deletions

View File

@ -123,6 +123,36 @@ database configuration:
Here, "version" should be the version of your Presto cluster. Support for this
functionality was introduced in Presto 0.319.
You also need to enable the feature flag in your `superset_config.py`, and you
can optionally specify a custom formatter. Eg:
.. code-block:: python
def presto_query_cost_formatter(cost_estimate: List[Dict[str, float]]) -> List[Dict[str, str]]:
"""
Format cost estimate returned by Presto.
:param cost_estimate: JSON estimate from Presto
:return: Human readable cost estimate
"""
# Convert cost to dollars based on CPU and network cost. These coefficients are just
# examples, they need to be estimated based on your infrastructure.
cpu_coefficient = 2e-12
network_coefficient = 1e-12
cost = 0
for row in cost_estimate:
cost += row.get("cpuCost", 0) * cpu_coefficient
cost += row.get("networkCost", 0) * network_coefficient
return [{"Cost": f"US$ {cost:.2f}"}]
DEFAULT_FEATURE_FLAGS = {
"ESTIMATE_QUERY_COST": True,
"QUERY_COST_FORMATTERS_BY_ENGINE": {"presto": presto_query_cost_formatter},
}
.. _ref_ctas_engine_config:
Create Table As (CTAS)

View File

@ -674,7 +674,7 @@ class BaseEngineSpec: # pylint: disable=too-many-public-methods
@classmethod
def estimate_statement_cost(
cls, statement: str, database, cursor, user_name: str
) -> Dict[str, str]:
) -> Dict[str, Any]:
"""
Generate a SQL query that estimates the cost of a given statement.
@ -682,6 +682,19 @@ class BaseEngineSpec: # pylint: disable=too-many-public-methods
:param database: Database instance
:param cursor: Cursor instance
:param username: Effective username
:return: Dictionary with different costs
"""
raise Exception("Database does not support cost estimation")
@classmethod
def query_cost_formatter(
cls, raw_cost: List[Dict[str, Any]]
) -> List[Dict[str, str]]:
"""
Format cost estimate.
:param raw_cost: Raw estimate from `estimate_query_cost`
:return: Human readable cost estimate
"""
raise Exception("Database does not support cost estimation")

View File

@ -443,14 +443,15 @@ class PrestoEngineSpec(BaseEngineSpec):
@classmethod
def estimate_statement_cost( # pylint: disable=too-many-locals
cls, statement: str, database, cursor, user_name: str
) -> Dict[str, str]:
) -> Dict[str, float]:
"""
Generate a SQL query that estimates the cost of a given statement.
Run a SQL query that estimates the cost of a given statement.
:param statement: A single SQL statement
:param database: Database instance
:param cursor: Cursor instance
:param username: Effective username
:return: JSON estimate from Presto
"""
parsed_query = ParsedQuery(statement)
sql = parsed_query.stripped()
@ -476,7 +477,18 @@ class PrestoEngineSpec(BaseEngineSpec):
# }
# }
result = json.loads(cursor.fetchone()[0])
estimate = result["estimate"]
return result["estimate"]
@classmethod
def query_cost_formatter(
cls, raw_cost: List[Dict[str, float]]
) -> List[Dict[str, str]]:
"""
Format cost estimate.
:param raw_cost: JSON estimate from Presto
:return: Human readable cost estimate
"""
def humanize(value: Any, suffix: str) -> str:
try:
@ -493,7 +505,7 @@ class PrestoEngineSpec(BaseEngineSpec):
return f"{value} {prefix}{suffix}"
cost = {}
cost = []
columns = [
("outputRowCount", "Output count", " rows"),
("outputSizeInBytes", "Output size", "B"),
@ -501,9 +513,12 @@ class PrestoEngineSpec(BaseEngineSpec):
("maxMemory", "Max memory", "B"),
("networkCost", "Network cost", ""),
]
for key, label, suffix in columns:
if key in estimate:
cost[label] = humanize(estimate[key], suffix)
for row in raw_cost:
statement_cost = {}
for key, label, suffix in columns:
if key in row:
statement_cost[label] = humanize(row[key], suffix).strip()
cost.append(statement_cost)
return cost

View File

@ -2406,6 +2406,15 @@ class Superset(BaseSupersetView):
except Exception as e:
return json_error_response(str(e))
spec = mydb.db_engine_spec
query_cost_formatters = get_feature_flags().get(
"QUERY_COST_FORMATTERS_BY_ENGINE", {}
)
query_cost_formatter = query_cost_formatters.get(
spec.engine, spec.query_cost_formatter
)
cost = query_cost_formatter(cost)
return json_success(json.dumps(cost))
@expose("/theme/")