Allow user to customize query cost estimate (#8470)
* Allow user to customize query estimate * Add docs; run black * Update docs with types
This commit is contained in:
parent
9a29116d6b
commit
338a2b1a51
|
|
@ -123,6 +123,36 @@ database configuration:
|
|||
Here, "version" should be the version of your Presto cluster. Support for this
|
||||
functionality was introduced in Presto 0.319.
|
||||
|
||||
You also need to enable the feature flag in your `superset_config.py`, and you
|
||||
can optionally specify a custom formatter. Eg:
|
||||
|
||||
.. code-block:: python
|
||||
|
||||
def presto_query_cost_formatter(cost_estimate: List[Dict[str, float]]) -> List[Dict[str, str]]:
|
||||
"""
|
||||
Format cost estimate returned by Presto.
|
||||
|
||||
:param cost_estimate: JSON estimate from Presto
|
||||
:return: Human readable cost estimate
|
||||
"""
|
||||
# Convert cost to dollars based on CPU and network cost. These coefficients are just
|
||||
# examples, they need to be estimated based on your infrastructure.
|
||||
cpu_coefficient = 2e-12
|
||||
network_coefficient = 1e-12
|
||||
|
||||
cost = 0
|
||||
for row in cost_estimate:
|
||||
cost += row.get("cpuCost", 0) * cpu_coefficient
|
||||
cost += row.get("networkCost", 0) * network_coefficient
|
||||
|
||||
return [{"Cost": f"US$ {cost:.2f}"}]
|
||||
|
||||
|
||||
DEFAULT_FEATURE_FLAGS = {
|
||||
"ESTIMATE_QUERY_COST": True,
|
||||
"QUERY_COST_FORMATTERS_BY_ENGINE": {"presto": presto_query_cost_formatter},
|
||||
}
|
||||
|
||||
.. _ref_ctas_engine_config:
|
||||
|
||||
Create Table As (CTAS)
|
||||
|
|
|
|||
|
|
@ -674,7 +674,7 @@ class BaseEngineSpec: # pylint: disable=too-many-public-methods
|
|||
@classmethod
|
||||
def estimate_statement_cost(
|
||||
cls, statement: str, database, cursor, user_name: str
|
||||
) -> Dict[str, str]:
|
||||
) -> Dict[str, Any]:
|
||||
"""
|
||||
Generate a SQL query that estimates the cost of a given statement.
|
||||
|
||||
|
|
@ -682,6 +682,19 @@ class BaseEngineSpec: # pylint: disable=too-many-public-methods
|
|||
:param database: Database instance
|
||||
:param cursor: Cursor instance
|
||||
:param username: Effective username
|
||||
:return: Dictionary with different costs
|
||||
"""
|
||||
raise Exception("Database does not support cost estimation")
|
||||
|
||||
@classmethod
|
||||
def query_cost_formatter(
|
||||
cls, raw_cost: List[Dict[str, Any]]
|
||||
) -> List[Dict[str, str]]:
|
||||
"""
|
||||
Format cost estimate.
|
||||
|
||||
:param raw_cost: Raw estimate from `estimate_query_cost`
|
||||
:return: Human readable cost estimate
|
||||
"""
|
||||
raise Exception("Database does not support cost estimation")
|
||||
|
||||
|
|
|
|||
|
|
@ -443,14 +443,15 @@ class PrestoEngineSpec(BaseEngineSpec):
|
|||
@classmethod
|
||||
def estimate_statement_cost( # pylint: disable=too-many-locals
|
||||
cls, statement: str, database, cursor, user_name: str
|
||||
) -> Dict[str, str]:
|
||||
) -> Dict[str, float]:
|
||||
"""
|
||||
Generate a SQL query that estimates the cost of a given statement.
|
||||
Run a SQL query that estimates the cost of a given statement.
|
||||
|
||||
:param statement: A single SQL statement
|
||||
:param database: Database instance
|
||||
:param cursor: Cursor instance
|
||||
:param username: Effective username
|
||||
:return: JSON estimate from Presto
|
||||
"""
|
||||
parsed_query = ParsedQuery(statement)
|
||||
sql = parsed_query.stripped()
|
||||
|
|
@ -476,7 +477,18 @@ class PrestoEngineSpec(BaseEngineSpec):
|
|||
# }
|
||||
# }
|
||||
result = json.loads(cursor.fetchone()[0])
|
||||
estimate = result["estimate"]
|
||||
return result["estimate"]
|
||||
|
||||
@classmethod
|
||||
def query_cost_formatter(
|
||||
cls, raw_cost: List[Dict[str, float]]
|
||||
) -> List[Dict[str, str]]:
|
||||
"""
|
||||
Format cost estimate.
|
||||
|
||||
:param raw_cost: JSON estimate from Presto
|
||||
:return: Human readable cost estimate
|
||||
"""
|
||||
|
||||
def humanize(value: Any, suffix: str) -> str:
|
||||
try:
|
||||
|
|
@ -493,7 +505,7 @@ class PrestoEngineSpec(BaseEngineSpec):
|
|||
|
||||
return f"{value} {prefix}{suffix}"
|
||||
|
||||
cost = {}
|
||||
cost = []
|
||||
columns = [
|
||||
("outputRowCount", "Output count", " rows"),
|
||||
("outputSizeInBytes", "Output size", "B"),
|
||||
|
|
@ -501,9 +513,12 @@ class PrestoEngineSpec(BaseEngineSpec):
|
|||
("maxMemory", "Max memory", "B"),
|
||||
("networkCost", "Network cost", ""),
|
||||
]
|
||||
for key, label, suffix in columns:
|
||||
if key in estimate:
|
||||
cost[label] = humanize(estimate[key], suffix)
|
||||
for row in raw_cost:
|
||||
statement_cost = {}
|
||||
for key, label, suffix in columns:
|
||||
if key in row:
|
||||
statement_cost[label] = humanize(row[key], suffix).strip()
|
||||
cost.append(statement_cost)
|
||||
|
||||
return cost
|
||||
|
||||
|
|
|
|||
|
|
@ -2406,6 +2406,15 @@ class Superset(BaseSupersetView):
|
|||
except Exception as e:
|
||||
return json_error_response(str(e))
|
||||
|
||||
spec = mydb.db_engine_spec
|
||||
query_cost_formatters = get_feature_flags().get(
|
||||
"QUERY_COST_FORMATTERS_BY_ENGINE", {}
|
||||
)
|
||||
query_cost_formatter = query_cost_formatters.get(
|
||||
spec.engine, spec.query_cost_formatter
|
||||
)
|
||||
cost = query_cost_formatter(cost)
|
||||
|
||||
return json_success(json.dumps(cost))
|
||||
|
||||
@expose("/theme/")
|
||||
|
|
|
|||
Loading…
Reference in New Issue