From a2d69ea25205447946ca2620b3a35e727e1eab6c Mon Sep 17 00:00:00 2001 From: rijojoseph07 Date: Wed, 30 Jun 2021 15:35:22 +0530 Subject: [PATCH] feat(trino): add support for query cost estimate #15166 (#15177) * added estimate_statement_cost to trino * file formatted Co-authored-by: rijojoseph01 --- superset/db_engine_specs/trino.py | 83 ++++++++++++++++++++++++++++++- 1 file changed, 82 insertions(+), 1 deletion(-) diff --git a/superset/db_engine_specs/trino.py b/superset/db_engine_specs/trino.py index 7c60e6228..9218f111a 100644 --- a/superset/db_engine_specs/trino.py +++ b/superset/db_engine_specs/trino.py @@ -15,9 +15,10 @@ # specific language governing permissions and limitations # under the License. from datetime import datetime -from typing import Any, Dict, Optional +from typing import Any, Dict, List, Optional from urllib import parse +import simplejson as json from sqlalchemy.engine.url import make_url, URL from superset.db_engine_specs.base import BaseEngineSpec @@ -103,3 +104,83 @@ class TrinoEngineSpec(BaseEngineSpec): :param username: Effective username """ # Do nothing and let update_impersonation_config take care of impersonation + + @classmethod + def get_allow_cost_estimate(cls, extra: Dict[str, Any]) -> bool: + return True + + @classmethod + def estimate_statement_cost( # pylint: disable=too-many-locals + cls, statement: str, cursor: Any + ) -> Dict[str, Any]: + """ + Run a SQL query that estimates the cost of a given statement. + + :param statement: A single SQL statement + :param database: Database instance + :param cursor: Cursor instance + :param username: Effective username + :return: JSON response from Trino + """ + sql = f"EXPLAIN (TYPE IO, FORMAT JSON) {statement}" + cursor.execute(sql) + + # the output from Trino is a single column and a single row containing + # JSON: + # + # { + # ... + # "estimate" : { + # "outputRowCount" : 8.73265878E8, + # "outputSizeInBytes" : 3.41425774958E11, + # "cpuCost" : 3.41425774958E11, + # "maxMemory" : 0.0, + # "networkCost" : 3.41425774958E11 + # } + # } + result = json.loads(cursor.fetchone()[0]) + return result + + @classmethod + def query_cost_formatter( + cls, raw_cost: List[Dict[str, Any]] + ) -> List[Dict[str, str]]: + """ + Format cost estimate. + + :param raw_cost: JSON estimate from Trino + :return: Human readable cost estimate + """ + + def humanize(value: Any, suffix: str) -> str: + try: + value = int(value) + except ValueError: + return str(value) + + prefixes = ["K", "M", "G", "T", "P", "E", "Z", "Y"] + prefix = "" + to_next_prefix = 1000 + while value > to_next_prefix and prefixes: + prefix = prefixes.pop(0) + value //= to_next_prefix + + return f"{value} {prefix}{suffix}" + + cost = [] + columns = [ + ("outputRowCount", "Output count", " rows"), + ("outputSizeInBytes", "Output size", "B"), + ("cpuCost", "CPU cost", ""), + ("maxMemory", "Max memory", "B"), + ("networkCost", "Network cost", ""), + ] + for row in raw_cost: + estimate: Dict[str, float] = row.get("estimate", {}) + statement_cost = {} + for key, label, suffix in columns: + if key in estimate: + statement_cost[label] = humanize(estimate[key], suffix).strip() + cost.append(statement_cost) + + return cost